fp_types/
lib.rs

1#![forbid(unsafe_code)]
2#![warn(rustdoc::broken_intra_doc_links)]
3
4//! Foundational value-type abstractions for **frankenpandas** — the
5//! enums, structs, and free functions that every other crate
6//! (fp-columnar, fp-index, fp-frame, fp-io, ...) consumes when
7//! representing scalar data, dtypes, missing values, and time deltas.
8//!
9//! The types here intentionally stay tiny and dependency-light
10//! (`serde`, `thiserror`) so they can sit at the bottom of the
11//! workspace dep graph.
12//!
13//! ## Core value types
14//!
15//! - [`DType`]: the dtype enum — `Null`, `Bool`, `Int64`, `Float64`,
16//!   `Utf8`, `Categorical`, `Timedelta64`, `Datetime64`, `Period`,
17//!   `Interval`, `Sparse`. Drives column / series storage decisions
18//!   across the workspace.
19//! - [`Scalar`]: the per-cell value enum, parameterized by `DType`.
20//!   Each variant holds the actual data (`Int64(i64)`, `Float64(f64)`,
21//!   `Utf8(String)`, ...) plus the `Null(NullKind)` variant for
22//!   missing values.
23//! - [`NullKind`]: distinguishes the three pandas missing-value
24//!   "flavors" — `Null` (Python `None` / SQL NULL), `NaN`
25//!   (floating-point not-a-number), `NaT` (timedelta / datetime
26//!   not-a-time). `Scalar::Null(...)` carries the kind so downstream
27//!   code can preserve pandas semantics.
28//! - [`SparseDType`]: descriptor for sparse-encoded dtypes (paired
29//!   value dtype + fill value).
30//!
31//! ## Time / duration types
32//!
33//! - [`Timedelta`]: nanosecond-precision duration with arithmetic
34//!   helpers ([`Timedelta::add`], [`Timedelta::sub`],
35//!   [`Timedelta::mul_scalar`], [`Timedelta::div_scalar`],
36//!   [`Timedelta::div_timedelta`]) that propagate `NaT` per pandas
37//!   semantics. [`TimedeltaComponents`] breaks a timedelta into
38//!   days/hours/minutes/seconds/nanos for display.
39//! - [`Timestamp`]: nanosecond-precision wall-clock timestamp with
40//!   optional timezone. Includes floor / ceil / round helpers and
41//!   `NaT` propagation.
42//!
43//! ## Dtype inference + casting
44//!
45//! - [`infer_dtype`]: derive a [`DType`] from a slice of scalars
46//!   (used during DataFrame construction).
47//! - [`common_dtype`]: pandas-style dtype promotion for binary ops.
48//! - [`cast_scalar`] / [`cast_scalar_owned`]: convert a scalar to a
49//!   target dtype with explicit error reporting on impossible casts.
50//!
51//! ## Missing-value helpers
52//!
53//! Free fns matching `pd.isna` / `pd.notna` / `pd.fillna` / `pd.dropna`
54//! plus the `nan*` aggregations ([`nansum`], [`nanmean`], [`nancount`],
55//! [`nanmin`], [`nanmax`], [`nanmedian`], [`nanvar`], [`nanstd`])
56//! that mirror pandas' missing-aware reductions.
57//!
58//! ## Error reporting
59//!
60//! Errors are explicit enums via `thiserror`: [`TypeError`] for
61//! dtype-related failures (incompatible-cast, no-common-dtype) and
62//! [`TimedeltaError`] for parse failures.
63
64use serde::{Deserialize, Serialize};
65use thiserror::Error;
66
67#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
68#[serde(rename_all = "snake_case")]
69pub enum DType {
70    Null,
71    Bool,
72    /// Nullable boolean extension dtype. Matches pandas `BooleanDtype()`.
73    #[serde(rename = "boolean")]
74    BoolNullable,
75    Int64,
76    /// Nullable Int64 extension dtype. Matches pandas `Int64Dtype()` / `pd.NA`.
77    #[serde(rename = "Int64")]
78    Int64Nullable,
79    Float64,
80    #[serde(alias = "string", alias = "str")]
81    Utf8,
82    Categorical,
83    Timedelta64,
84    /// Nanosecond-precision datetime since Unix epoch. Matches pandas `datetime64[ns]`.
85    Datetime64,
86    /// Period ordinal. Matches pandas `period[freq]`. Stores ordinal + frequency code.
87    Period,
88    /// Numeric interval value. Matches pandas `interval[float64]`.
89    Interval,
90    Sparse,
91}
92
93impl DType {
94    /// Returns true if the dtype is numeric (integer or floating point).
95    #[must_use]
96    pub const fn is_numeric(&self) -> bool {
97        matches!(self, Self::Int64 | Self::Int64Nullable | Self::Float64)
98    }
99
100    /// Returns true if the dtype is an integer type.
101    #[must_use]
102    pub const fn is_integer(&self) -> bool {
103        matches!(self, Self::Int64 | Self::Int64Nullable)
104    }
105
106    /// Returns true if the dtype is a floating point type.
107    #[must_use]
108    pub const fn is_floating(&self) -> bool {
109        matches!(self, Self::Float64)
110    }
111
112    /// Returns true if the dtype is boolean.
113    #[must_use]
114    pub const fn is_bool(&self) -> bool {
115        matches!(self, Self::Bool | Self::BoolNullable)
116    }
117
118    /// Returns true if the dtype is object/string type.
119    #[must_use]
120    pub const fn is_object(&self) -> bool {
121        matches!(self, Self::Utf8)
122    }
123
124    /// Returns true if the dtype is datetime.
125    #[must_use]
126    pub const fn is_datetime(&self) -> bool {
127        matches!(self, Self::Datetime64)
128    }
129
130    /// Returns true if the dtype is timedelta.
131    #[must_use]
132    pub const fn is_timedelta(&self) -> bool {
133        matches!(self, Self::Timedelta64)
134    }
135
136    /// Returns true if the dtype is categorical.
137    #[must_use]
138    pub const fn is_categorical(&self) -> bool {
139        matches!(self, Self::Categorical)
140    }
141
142    /// Returns true if the dtype is sparse.
143    #[must_use]
144    pub const fn is_sparse(&self) -> bool {
145        matches!(self, Self::Sparse)
146    }
147
148    /// Returns true if the dtype is period.
149    #[must_use]
150    pub const fn is_period(&self) -> bool {
151        matches!(self, Self::Period)
152    }
153
154    /// Returns true if the dtype is interval.
155    #[must_use]
156    pub const fn is_interval(&self) -> bool {
157        matches!(self, Self::Interval)
158    }
159
160    /// Return the dtype name as a string.
161    ///
162    /// Matches numpy dtype.name property.
163    #[must_use]
164    pub const fn name(&self) -> &'static str {
165        match self {
166            Self::Bool => "bool",
167            Self::BoolNullable => "boolean",
168            Self::Int64 => "int64",
169            Self::Int64Nullable => "Int64",
170            Self::Float64 => "float64",
171            Self::Utf8 => "object",
172            Self::Datetime64 => "datetime64[ns]",
173            Self::Timedelta64 => "timedelta64[ns]",
174            Self::Categorical => "category",
175            Self::Period => "period",
176            Self::Interval => "interval",
177            Self::Sparse => "Sparse",
178            Self::Null => "object",
179        }
180    }
181
182    /// Return the dtype kind character.
183    ///
184    /// Matches numpy dtype.kind property.
185    #[must_use]
186    pub const fn kind(&self) -> char {
187        match self {
188            Self::Bool | Self::BoolNullable => 'b',
189            Self::Int64 | Self::Int64Nullable => 'i',
190            Self::Float64 => 'f',
191            Self::Utf8 => 'O',
192            Self::Datetime64 => 'M',
193            Self::Timedelta64 => 'm',
194            Self::Categorical => 'O',
195            Self::Period => 'O',
196            Self::Interval => 'O',
197            Self::Sparse => 'O',
198            Self::Null => 'O',
199        }
200    }
201
202    /// Return the dtype itemsize in bytes.
203    ///
204    /// Matches numpy dtype.itemsize property.
205    #[must_use]
206    pub const fn itemsize(&self) -> usize {
207        match self {
208            Self::Bool | Self::BoolNullable => 1,
209            Self::Int64
210            | Self::Int64Nullable
211            | Self::Float64
212            | Self::Datetime64
213            | Self::Timedelta64
214            | Self::Period => 8,
215            Self::Utf8 | Self::Categorical | Self::Interval | Self::Sparse | Self::Null => 8,
216        }
217    }
218
219    /// Returns true if this is an extension dtype (categorical, sparse, period, interval, nullable).
220    ///
221    /// Matches `pd.api.types.is_extension_array_dtype()`.
222    #[must_use]
223    pub const fn is_extension(&self) -> bool {
224        matches!(
225            self,
226            Self::Categorical
227                | Self::Sparse
228                | Self::Period
229                | Self::Interval
230                | Self::Int64Nullable
231                | Self::BoolNullable
232        )
233    }
234
235    /// Returns true if this is a nullable extension dtype (Int64, boolean).
236    ///
237    /// Nullable extension dtypes preserve their dtype when nulls are introduced,
238    /// unlike numpy dtypes which promote to float64.
239    #[must_use]
240    pub const fn is_nullable(&self) -> bool {
241        matches!(self, Self::Int64Nullable | Self::BoolNullable)
242    }
243
244    /// Returns the non-nullable equivalent dtype.
245    ///
246    /// For nullable extension dtypes, returns the numpy equivalent.
247    /// For non-nullable dtypes, returns self.
248    #[must_use]
249    pub const fn to_non_nullable(&self) -> Self {
250        match self {
251            Self::Int64Nullable => Self::Int64,
252            Self::BoolNullable => Self::Bool,
253            other => *other,
254        }
255    }
256
257    /// Returns the nullable equivalent dtype.
258    ///
259    /// For numpy int64/bool, returns the nullable extension dtype.
260    /// For already-nullable or other dtypes, returns self.
261    #[must_use]
262    pub const fn to_nullable(&self) -> Self {
263        match self {
264            Self::Int64 => Self::Int64Nullable,
265            Self::Bool => Self::BoolNullable,
266            other => *other,
267        }
268    }
269
270    /// Returns true if this is a signed integer type.
271    ///
272    /// Matches `pd.api.types.is_signed_integer_dtype()`.
273    #[must_use]
274    pub const fn is_signed_integer(&self) -> bool {
275        matches!(self, Self::Int64 | Self::Int64Nullable)
276    }
277
278    /// Returns true if this is a string/object dtype.
279    ///
280    /// Matches `pd.api.types.is_string_dtype()`.
281    #[must_use]
282    pub const fn is_string_dtype(&self) -> bool {
283        matches!(self, Self::Utf8)
284    }
285
286    /// Returns true for any real numeric dtype (integer or float).
287    ///
288    /// Matches `pd.api.types.is_any_real_numeric_dtype()`.
289    #[must_use]
290    pub const fn is_any_real_numeric(&self) -> bool {
291        self.is_numeric()
292    }
293
294    /// Returns true for datetime-like dtypes (datetime, timedelta, period).
295    ///
296    /// Matches `pd.api.types.is_datetime64_any_dtype()` family.
297    #[must_use]
298    pub const fn is_datetime_like(&self) -> bool {
299        matches!(self, Self::Datetime64 | Self::Timedelta64 | Self::Period)
300    }
301
302    /// Return the numpy dtype character code.
303    ///
304    /// Matches numpy dtype.char property.
305    #[must_use]
306    pub const fn char(&self) -> char {
307        match self {
308            Self::Bool | Self::BoolNullable => '?',
309            Self::Int64 | Self::Int64Nullable => 'l',
310            Self::Float64 => 'd',
311            Self::Utf8 => 'O',
312            Self::Datetime64 => 'M',
313            Self::Timedelta64 => 'm',
314            Self::Categorical | Self::Period | Self::Interval | Self::Sparse | Self::Null => 'O',
315        }
316    }
317
318    /// Return the numpy type number.
319    ///
320    /// Matches numpy dtype.num property.
321    #[must_use]
322    pub const fn num(&self) -> i32 {
323        match self {
324            Self::Bool | Self::BoolNullable => 0,
325            Self::Int64 | Self::Int64Nullable => 7,
326            Self::Float64 => 12,
327            Self::Utf8 => 17,
328            Self::Datetime64 => 21,
329            Self::Timedelta64 => 22,
330            Self::Categorical | Self::Period | Self::Interval | Self::Sparse | Self::Null => 17,
331        }
332    }
333
334    /// Return the byte order character.
335    ///
336    /// Matches numpy dtype.byteorder property. Returns '=' (native) for all types.
337    #[must_use]
338    pub const fn byteorder(&self) -> char {
339        '='
340    }
341
342    /// Return the numpy dtype string representation.
343    ///
344    /// Matches numpy dtype.str property.
345    #[must_use]
346    pub const fn str_repr(&self) -> &'static str {
347        match self {
348            Self::Bool | Self::BoolNullable => "|b1",
349            Self::Int64 | Self::Int64Nullable => "<i8",
350            Self::Float64 => "<f8",
351            Self::Utf8 => "|O8",
352            Self::Datetime64 => "<M8[ns]",
353            Self::Timedelta64 => "<m8[ns]",
354            Self::Categorical | Self::Period | Self::Interval | Self::Sparse | Self::Null => "|O8",
355        }
356    }
357}
358
359#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
360pub struct SparseDType {
361    pub value_dtype: DType,
362    pub fill_value: Scalar,
363}
364
365impl SparseDType {
366    /// Construct a pandas-style sparse dtype descriptor.
367    ///
368    /// This records the logical dense value dtype plus the scalar value that is
369    /// elided from storage. The concrete sparse column representation lives in
370    /// fp-columnar; this descriptor is the shared public contract.
371    pub fn new(value_dtype: DType, fill_value: Scalar) -> Result<Self, TypeError> {
372        if matches!(value_dtype, DType::Null | DType::Sparse) {
373            return Err(TypeError::InvalidSparseValueDType { dtype: value_dtype });
374        }
375
376        let fill_value = if fill_value.is_missing() {
377            Scalar::missing_for_dtype(value_dtype)
378        } else {
379            cast_scalar_owned(fill_value, value_dtype)?
380        };
381
382        Ok(Self {
383            value_dtype,
384            fill_value,
385        })
386    }
387}
388
389#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
390#[serde(rename_all = "snake_case")]
391pub enum NullKind {
392    Null,
393    NaN,
394    NaT,
395}
396
397#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
398#[serde(tag = "kind", content = "value", rename_all = "snake_case")]
399pub enum Scalar {
400    Null(NullKind),
401    Bool(bool),
402    Int64(i64),
403    Float64(f64),
404    #[serde(alias = "string", alias = "str")]
405    Utf8(String),
406    Timedelta64(i64),
407    /// Nanoseconds since Unix epoch. Matches pandas `datetime64[ns]`.
408    /// Uses `Timestamp::NAT` (i64::MIN) for missing values.
409    Datetime64(i64),
410    /// Period ordinal. Uses i64::MIN for NaT (missing value).
411    Period(i64),
412    /// Numeric interval value. Missing values remain `Scalar::Null`.
413    Interval(Interval),
414}
415
416impl std::fmt::Display for Scalar {
417    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
418        match self {
419            Self::Null(NullKind::NaN) => write!(f, "NaN"),
420            Self::Null(NullKind::NaT) => write!(f, "NaT"),
421            Self::Null(NullKind::Null) => write!(f, "None"),
422            Self::Bool(b) => write!(f, "{}", if *b { "True" } else { "False" }),
423            Self::Int64(v) => write!(f, "{v}"),
424            Self::Float64(v) => write!(f, "{v}"),
425            Self::Utf8(s) => write!(f, "{s}"),
426            Self::Timedelta64(nanos) => write!(f, "{}", Timedelta::format(*nanos)),
427            Self::Datetime64(nanos) => {
428                if *nanos == Timestamp::NAT {
429                    write!(f, "NaT")
430                } else {
431                    write!(f, "Timestamp[{nanos}]")
432                }
433            }
434            Self::Period(ordinal) => {
435                if *ordinal == i64::MIN {
436                    write!(f, "NaT")
437                } else {
438                    write!(f, "Period[{ordinal}]")
439                }
440            }
441            Self::Interval(interval) => write!(f, "{interval}"),
442        }
443    }
444}
445
446// Ergonomic From impls (br-frankenpandas-esjjy / fd90.182). Mirrors
447// IndexLabel's From<i64>/From<&str>/From<String> so users can write
448//   let v: Vec<Scalar> = vec![1i64.into(), 2.0.into(), "three".into()];
449// instead of the explicit Scalar::Int64(...)/Scalar::Float64(...) form.
450//
451// i64 maps to Int64 (more common than Timedelta64 in pandas-style code).
452// Users wanting Timedelta64 should construct it explicitly with
453// Scalar::Timedelta64(nanos) or via Timedelta::parse / to_timedelta.
454
455impl From<bool> for Scalar {
456    fn from(value: bool) -> Self {
457        Self::Bool(value)
458    }
459}
460
461impl From<i64> for Scalar {
462    fn from(value: i64) -> Self {
463        Self::Int64(value)
464    }
465}
466
467impl From<f64> for Scalar {
468    fn from(value: f64) -> Self {
469        Self::Float64(value)
470    }
471}
472
473impl From<&str> for Scalar {
474    fn from(value: &str) -> Self {
475        Self::Utf8(value.to_owned())
476    }
477}
478
479impl From<String> for Scalar {
480    fn from(value: String) -> Self {
481        Self::Utf8(value)
482    }
483}
484
485impl Scalar {
486    #[must_use]
487    pub fn dtype(&self) -> DType {
488        match self {
489            Self::Null(_) => DType::Null,
490            Self::Bool(_) => DType::Bool,
491            Self::Int64(_) => DType::Int64,
492            Self::Float64(_) => DType::Float64,
493            Self::Utf8(_) => DType::Utf8,
494            Self::Timedelta64(_) => DType::Timedelta64,
495            Self::Datetime64(_) => DType::Datetime64,
496            Self::Period(_) => DType::Period,
497            Self::Interval(_) => DType::Interval,
498        }
499    }
500
501    #[must_use]
502    pub fn is_missing(&self) -> bool {
503        match self {
504            Self::Null(_) => true,
505            Self::Float64(v) => v.is_nan(),
506            Self::Timedelta64(v) => *v == Timedelta::NAT,
507            Self::Datetime64(v) => *v == Timestamp::NAT,
508            Self::Period(v) => *v == i64::MIN,
509            _ => false,
510        }
511    }
512
513    #[must_use]
514    pub fn is_nan(&self) -> bool {
515        matches!(self, Self::Null(NullKind::NaN)) || matches!(self, Self::Float64(v) if v.is_nan())
516    }
517
518    /// Returns true if this is a Bool scalar.
519    #[must_use]
520    pub const fn is_bool(&self) -> bool {
521        matches!(self, Self::Bool(_))
522    }
523
524    /// Returns true if this is an Int64 scalar.
525    #[must_use]
526    pub const fn is_integer(&self) -> bool {
527        matches!(self, Self::Int64(_))
528    }
529
530    /// Returns true if this is a Float64 scalar.
531    #[must_use]
532    pub const fn is_float(&self) -> bool {
533        matches!(self, Self::Float64(_))
534    }
535
536    /// Returns true if this is a numeric scalar (Int64 or Float64).
537    #[must_use]
538    pub const fn is_numeric(&self) -> bool {
539        matches!(self, Self::Int64(_) | Self::Float64(_))
540    }
541
542    /// Returns true if this is a Utf8 (string) scalar.
543    #[must_use]
544    pub const fn is_string(&self) -> bool {
545        matches!(self, Self::Utf8(_))
546    }
547
548    /// Returns true if this is a Datetime64 scalar.
549    #[must_use]
550    pub const fn is_datetime(&self) -> bool {
551        matches!(self, Self::Datetime64(_))
552    }
553
554    /// Returns true if this is a Timedelta64 scalar.
555    #[must_use]
556    pub const fn is_timedelta(&self) -> bool {
557        matches!(self, Self::Timedelta64(_))
558    }
559
560    /// Returns true if this is a Period scalar.
561    #[must_use]
562    pub const fn is_period(&self) -> bool {
563        matches!(self, Self::Period(_))
564    }
565
566    /// Returns true if this is an Interval scalar.
567    #[must_use]
568    pub const fn is_interval(&self) -> bool {
569        matches!(self, Self::Interval(_))
570    }
571
572    #[must_use]
573    pub fn missing_for_dtype(dtype: DType) -> Self {
574        match dtype {
575            DType::Float64 => Self::Null(NullKind::NaN),
576            DType::Timedelta64 => Self::Timedelta64(Timedelta::NAT),
577            DType::Datetime64 => Self::Datetime64(Timestamp::NAT),
578            DType::Period => Self::Period(i64::MIN),
579            DType::Null => Self::Null(NullKind::Null),
580            DType::Bool
581            | DType::BoolNullable
582            | DType::Int64
583            | DType::Int64Nullable
584            | DType::Utf8
585            | DType::Categorical
586            | DType::Interval
587            | DType::Sparse => Self::Null(NullKind::Null),
588        }
589    }
590
591    #[must_use]
592    pub fn semantic_eq(&self, other: &Self) -> bool {
593        match (self, other) {
594            (Self::Float64(a), Self::Float64(b)) => {
595                if a.is_nan() && b.is_nan() {
596                    return true;
597                }
598                if *a == *b {
599                    return true;
600                }
601                let diff = (*a - *b).abs();
602                let max_abs = a.abs().max(b.abs());
603                if max_abs == 0.0 {
604                    diff < f64::EPSILON
605                } else {
606                    diff / max_abs < 1e-14
607                }
608            }
609            (Self::Null(_), Self::Float64(v)) | (Self::Float64(v), Self::Null(_)) => v.is_nan(),
610            // All Null kinds (Null / NaN / NaT) mark missingness; they are
611            // semantically indistinguishable for oracle-parity checks even
612            // though derived PartialEq would reject a cross-kind pair.
613            // fp-frame normalizes Float64 column missing cells to
614            // Null(NaN) at Column::new time, while fixture oracles encode
615            // the canonical missing marker as Null(Null).
616            (Self::Null(_), Self::Null(_)) => true,
617            _ => self == other,
618        }
619    }
620
621    #[must_use]
622    pub fn semantic_le(&self, other: &Self) -> bool {
623        match self.semantic_cmp(other) {
624            std::cmp::Ordering::Less | std::cmp::Ordering::Equal => true,
625            std::cmp::Ordering::Greater => false,
626        }
627    }
628
629    #[must_use]
630    pub fn semantic_ge(&self, other: &Self) -> bool {
631        match self.semantic_cmp(other) {
632            std::cmp::Ordering::Greater | std::cmp::Ordering::Equal => true,
633            std::cmp::Ordering::Less => false,
634        }
635    }
636
637    #[must_use]
638    pub fn is_null(&self) -> bool {
639        matches!(self, Self::Null(_))
640    }
641
642    #[must_use]
643    pub fn is_na(&self) -> bool {
644        self.is_missing()
645    }
646
647    #[must_use]
648    pub fn coalesce(&self, other: &Self) -> Self {
649        if self.is_missing() {
650            other.clone()
651        } else {
652            self.clone()
653        }
654    }
655
656    #[must_use]
657    pub fn semantic_cmp(&self, other: &Self) -> std::cmp::Ordering {
658        match (self, other) {
659            (Self::Int64(a), Self::Int64(b)) => a.cmp(b),
660            (Self::Float64(a), Self::Float64(b)) => {
661                a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)
662            }
663            (Self::Utf8(a), Self::Utf8(b)) => a.cmp(b),
664            (Self::Bool(a), Self::Bool(b)) => a.cmp(b),
665            (Self::Null(a), Self::Null(b)) => a.cmp(b),
666            (Self::Timedelta64(a), Self::Timedelta64(b)) => {
667                if *a == Timedelta::NAT || *b == Timedelta::NAT {
668                    std::cmp::Ordering::Equal
669                } else {
670                    a.cmp(b)
671                }
672            }
673            (Self::Datetime64(a), Self::Datetime64(b)) => {
674                if *a == Timestamp::NAT || *b == Timestamp::NAT {
675                    std::cmp::Ordering::Equal
676                } else {
677                    a.cmp(b)
678                }
679            }
680            (Self::Period(a), Self::Period(b)) => {
681                if *a == i64::MIN || *b == i64::MIN {
682                    std::cmp::Ordering::Equal
683                } else {
684                    a.cmp(b)
685                }
686            }
687            (Self::Interval(a), Self::Interval(b)) => a
688                .left
689                .partial_cmp(&b.left)
690                .unwrap_or(std::cmp::Ordering::Equal)
691                .then_with(|| {
692                    a.right
693                        .partial_cmp(&b.right)
694                        .unwrap_or(std::cmp::Ordering::Equal)
695                })
696                .then_with(|| a.closed.cmp(&b.closed)),
697            // Cross-numeric comparison
698            (Self::Int64(a), Self::Float64(b)) => (*a as f64)
699                .partial_cmp(b)
700                .unwrap_or(std::cmp::Ordering::Equal),
701            (Self::Float64(a), Self::Int64(b)) => a
702                .partial_cmp(&(*b as f64))
703                .unwrap_or(std::cmp::Ordering::Equal),
704            // Fallback to debug representation for inconsistent types
705            (a, b) => format!("{a:?}").cmp(&format!("{b:?}")),
706        }
707    }
708
709    pub fn to_f64(&self) -> Result<f64, TypeError> {
710        match self {
711            Self::Bool(v) => Ok(if *v { 1.0 } else { 0.0 }),
712            Self::Int64(v) => Ok(*v as f64),
713            Self::Float64(v) => Ok(*v),
714            Self::Null(kind) => Err(TypeError::ValueIsMissing { kind: *kind }),
715            Self::Utf8(v) => Err(TypeError::NonNumericValue {
716                value: v.clone(),
717                dtype: DType::Utf8,
718            }),
719            Self::Timedelta64(v) if *v == Timedelta::NAT => Err(TypeError::ValueIsMissing {
720                kind: NullKind::NaT,
721            }),
722            Self::Timedelta64(v) => Err(TypeError::NonNumericValue {
723                value: Timedelta::format(*v),
724                dtype: DType::Timedelta64,
725            }),
726            Self::Datetime64(v) if *v == Timestamp::NAT => Err(TypeError::ValueIsMissing {
727                kind: NullKind::NaT,
728            }),
729            Self::Datetime64(v) => Err(TypeError::NonNumericValue {
730                value: format!("Timestamp[{v}]"),
731                dtype: DType::Datetime64,
732            }),
733            Self::Period(v) if *v == i64::MIN => Err(TypeError::ValueIsMissing {
734                kind: NullKind::NaT,
735            }),
736            Self::Period(v) => Err(TypeError::NonNumericValue {
737                value: format!("Period[{v}]"),
738                dtype: DType::Period,
739            }),
740            Self::Interval(v) => Err(TypeError::NonNumericValue {
741                value: v.to_string(),
742                dtype: DType::Interval,
743            }),
744        }
745    }
746
747    /// Try to convert to i64. Returns error for missing or non-numeric values.
748    pub fn to_i64(&self) -> Result<i64, TypeError> {
749        match self {
750            Self::Bool(v) => Ok(if *v { 1 } else { 0 }),
751            Self::Int64(v) => Ok(*v),
752            Self::Float64(v) => Ok(*v as i64),
753            Self::Null(kind) => Err(TypeError::ValueIsMissing { kind: *kind }),
754            Self::Utf8(v) => Err(TypeError::NonNumericValue {
755                value: v.clone(),
756                dtype: DType::Utf8,
757            }),
758            Self::Timedelta64(v) if *v == Timedelta::NAT => Err(TypeError::ValueIsMissing {
759                kind: NullKind::NaT,
760            }),
761            Self::Timedelta64(v) => Ok(*v),
762            Self::Datetime64(v) if *v == Timestamp::NAT => Err(TypeError::ValueIsMissing {
763                kind: NullKind::NaT,
764            }),
765            Self::Datetime64(v) => Ok(*v),
766            Self::Period(v) if *v == i64::MIN => Err(TypeError::ValueIsMissing {
767                kind: NullKind::NaT,
768            }),
769            Self::Period(v) => Ok(*v),
770            Self::Interval(v) => Err(TypeError::NonNumericValue {
771                value: v.to_string(),
772                dtype: DType::Interval,
773            }),
774        }
775    }
776
777    /// Try to convert to bool. Returns error for missing values.
778    pub fn to_bool(&self) -> Result<bool, TypeError> {
779        match self {
780            Self::Bool(v) => Ok(*v),
781            Self::Int64(v) => Ok(*v != 0),
782            Self::Float64(v) => Ok(*v != 0.0 && !v.is_nan()),
783            Self::Null(kind) => Err(TypeError::ValueIsMissing { kind: *kind }),
784            Self::Utf8(v) => Ok(!v.is_empty()),
785            Self::Timedelta64(v) if *v == Timedelta::NAT => Err(TypeError::ValueIsMissing {
786                kind: NullKind::NaT,
787            }),
788            Self::Timedelta64(v) => Ok(*v != 0),
789            Self::Datetime64(v) if *v == Timestamp::NAT => Err(TypeError::ValueIsMissing {
790                kind: NullKind::NaT,
791            }),
792            Self::Datetime64(v) => Ok(*v != 0),
793            Self::Period(v) if *v == i64::MIN => Err(TypeError::ValueIsMissing {
794                kind: NullKind::NaT,
795            }),
796            Self::Period(v) => Ok(*v != 0),
797            Self::Interval(_) => Ok(true),
798        }
799    }
800
801    /// Try to convert to string representation.
802    pub fn to_str(&self) -> String {
803        match self {
804            Self::Bool(v) => if *v { "True" } else { "False" }.to_string(),
805            Self::Int64(v) => v.to_string(),
806            Self::Float64(v) => {
807                if v.is_nan() {
808                    "nan".to_string()
809                } else if v.is_infinite() {
810                    if *v > 0.0 { "inf" } else { "-inf" }.to_string()
811                } else {
812                    v.to_string()
813                }
814            }
815            Self::Null(_) => "NaN".to_string(),
816            Self::Utf8(v) => v.clone(),
817            Self::Timedelta64(v) => Timedelta::format(*v),
818            Self::Datetime64(v) if *v == Timestamp::NAT => "NaT".to_string(),
819            Self::Datetime64(v) => Timestamp::from_nanos(*v).isoformat(),
820            Self::Period(v) if *v == i64::MIN => "NaT".to_string(),
821            Self::Period(v) => format!("Period[{}]", v),
822            Self::Interval(v) => v.to_string(),
823        }
824    }
825}
826
827#[derive(Debug, Error, Clone, PartialEq)]
828pub enum TypeError {
829    #[error("dtype coercion from {left:?} to {right:?} has no compatible common type")]
830    IncompatibleDtypes { left: DType, right: DType },
831    #[error("cannot cast scalar of dtype {from:?} to {to:?}")]
832    InvalidCast { from: DType, to: DType },
833    #[error("cannot cast float {value} to int64 without loss")]
834    LossyFloatToInt { value: f64 },
835    #[error("expected 0/1 for bool cast from int64 but found {value}")]
836    InvalidBoolInt { value: i64 },
837    #[error("expected 0.0/1.0 for bool cast from float64 but found {value}")]
838    InvalidBoolFloat { value: f64 },
839    #[error("value {value:?} has non-numeric dtype {dtype:?}")]
840    NonNumericValue { value: String, dtype: DType },
841    #[error("value is missing ({kind:?})")]
842    ValueIsMissing { kind: NullKind },
843    #[error("sparse value dtype cannot be {dtype:?}")]
844    InvalidSparseValueDType { dtype: DType },
845    #[error("interval_range step must be finite, positive, and not NaN (got {step})")]
846    InvalidIntervalStep { step: f64 },
847    #[error("interval_range step {step} does not evenly divide range end-start={span}")]
848    IntervalStepDoesNotDivide { step: f64, span: f64 },
849    #[error("cannot parse '{value}' as {target}")]
850    ValueNotParseable { value: String, target: String },
851}
852
853pub fn common_dtype(left: DType, right: DType) -> Result<DType, TypeError> {
854    use DType::{
855        Bool, BoolNullable, Categorical, Datetime64, Float64, Int64, Int64Nullable, Null, Sparse,
856        Timedelta64,
857    };
858
859    let out = match (left, right) {
860        (a, b) if a == b => a,
861        (Null, other) | (other, Null) => other,
862        (Categorical, Categorical) => Categorical,
863
864        // Bool promotions (nullable absorbs non-nullable)
865        (Bool, Int64) | (Int64, Bool) => Int64,
866        (Bool, Int64Nullable) | (Int64Nullable, Bool) => Int64Nullable,
867        (BoolNullable, Int64) | (Int64, BoolNullable) => Int64Nullable,
868        (BoolNullable, Int64Nullable) | (Int64Nullable, BoolNullable) => Int64Nullable,
869        (Bool, BoolNullable) | (BoolNullable, Bool) => BoolNullable,
870        (Bool, Float64) | (Float64, Bool) => Float64,
871        (BoolNullable, Float64) | (Float64, BoolNullable) => Float64,
872
873        // Int64 promotions (nullable absorbs non-nullable)
874        (Int64, Float64) | (Float64, Int64) => Float64,
875        (Int64Nullable, Float64) | (Float64, Int64Nullable) => Float64,
876        (Int64, Int64Nullable) | (Int64Nullable, Int64) => Int64Nullable,
877
878        // Datetime/Timedelta
879        (Timedelta64, Timedelta64) => Timedelta64,
880        (Datetime64, Datetime64) => Datetime64,
881
882        (Sparse, _) | (_, Sparse) => return Err(TypeError::IncompatibleDtypes { left, right }),
883        _ => return Err(TypeError::IncompatibleDtypes { left, right }),
884    };
885
886    Ok(out)
887}
888
889pub fn infer_dtype(values: &[Scalar]) -> Result<DType, TypeError> {
890    let mut current = DType::Null;
891    let mut saw_utf8 = false;
892    let mut saw_timedelta = false;
893    let mut saw_datetime = false;
894    let mut saw_non_utf8_non_null = false;
895
896    for value in values {
897        match value.dtype() {
898            DType::Null => {}
899            DType::Utf8 => saw_utf8 = true,
900            DType::Timedelta64 => {
901                saw_timedelta = true;
902                if current == DType::Null {
903                    current = DType::Timedelta64;
904                } else if current != DType::Timedelta64 {
905                    return Err(TypeError::IncompatibleDtypes {
906                        left: current,
907                        right: DType::Timedelta64,
908                    });
909                }
910            }
911            DType::Datetime64 => {
912                saw_datetime = true;
913                if current == DType::Null {
914                    current = DType::Datetime64;
915                } else if current != DType::Datetime64 {
916                    return Err(TypeError::IncompatibleDtypes {
917                        left: current,
918                        right: DType::Datetime64,
919                    });
920                }
921            }
922            other => {
923                saw_non_utf8_non_null = true;
924                current = common_dtype(current, other)?;
925            }
926        }
927
928        if saw_utf8 && saw_non_utf8_non_null {
929            // Constructor inference follows pandas object-dtype behavior for
930            // heterogeneous string/scalar payloads while arithmetic coercion
931            // remains governed by the stricter common_dtype lattice.
932            return Ok(DType::Utf8);
933        }
934        if saw_timedelta && saw_non_utf8_non_null {
935            return Err(TypeError::IncompatibleDtypes {
936                left: DType::Timedelta64,
937                right: current,
938            });
939        }
940        if saw_datetime && saw_non_utf8_non_null {
941            return Err(TypeError::IncompatibleDtypes {
942                left: DType::Datetime64,
943                right: current,
944            });
945        }
946    }
947
948    if saw_utf8 {
949        Ok(DType::Utf8)
950    } else {
951        Ok(current)
952    }
953}
954
955/// Cast a scalar to a target dtype, taking ownership to avoid redundant clones
956/// when the value already has the correct type (AG-03: identity-cast skip).
957pub fn cast_scalar_owned(value: Scalar, target: DType) -> Result<Scalar, TypeError> {
958    let from = value.dtype();
959    if from == target {
960        return Ok(value);
961    }
962    // Int64 <-> Int64Nullable: same representation, just different dtype tracking
963    if (from == DType::Int64 && target == DType::Int64Nullable)
964        || (from == DType::Int64Nullable && target == DType::Int64)
965    {
966        return Ok(value);
967    }
968    // Bool <-> BoolNullable: same representation
969    if (from == DType::Bool && target == DType::BoolNullable)
970        || (from == DType::BoolNullable && target == DType::Bool)
971    {
972        return Ok(value);
973    }
974    if target == DType::Utf8 {
975        return Ok(Scalar::Utf8(scalar_to_string_for_astype(value)));
976    }
977    // Per br-frankenpandas-cyi4h: pandas astype(bool) (the numpy bool dtype)
978    // treats a float NaN as truthy -> True (bool(nan) is True), unlike the
979    // nullable 'boolean' dtype which keeps NA. FP's NaN=missing model would
980    // otherwise fall through to the missing branch below and yield a null.
981    // Verified vs live pandas 2.2.3.
982    if target == DType::Bool
983        && let Scalar::Float64(v) = &value
984        && v.is_nan()
985    {
986        return Ok(Scalar::Bool(true));
987    }
988    if value.is_missing() {
989        return Ok(Scalar::missing_for_dtype(target));
990    }
991
992    // Note: identity casts (from == target) are handled above, so same-type
993    // arms are omitted from the match below.
994    match target {
995        DType::Null => Ok(Scalar::Null(NullKind::Null)),
996        DType::Bool => match &value {
997            // numpy bool: zero -> False, ANY nonzero -> True (it does not
998            // restrict to 0/1). e.g. bool of -3 / 2.5 is True.
999            Scalar::Int64(v) => Ok(Scalar::Bool(*v != 0)),
1000            // 0.0 and -0.0 -> False; every other value, INCLUDING NaN, -> True
1001            // (NaN != 0.0 is true), matching numpy/pandas truthiness.
1002            Scalar::Float64(v) => Ok(Scalar::Bool(*v != 0.0)),
1003            _ => Err(TypeError::InvalidCast { from, to: target }),
1004        },
1005        DType::BoolNullable => match &value {
1006            // pandas nullable Boolean ('boolean') is STRICT: only 0/1 (or
1007            // True/False) are accepted — any other value raises "Need to pass
1008            // bool-like values", UNLIKE numpy bool which is nonzero-truthy.
1009            // (br-frankenpandas-tjomg)
1010            Scalar::Bool(b) => Ok(Scalar::Bool(*b)),
1011            Scalar::Int64(0) => Ok(Scalar::Bool(false)),
1012            Scalar::Int64(1) => Ok(Scalar::Bool(true)),
1013            Scalar::Int64(v) => Err(TypeError::InvalidBoolInt { value: *v }),
1014            Scalar::Float64(v) if *v == 0.0 => Ok(Scalar::Bool(false)),
1015            Scalar::Float64(v) if *v == 1.0 => Ok(Scalar::Bool(true)),
1016            Scalar::Float64(v) => Err(TypeError::InvalidBoolFloat { value: *v }),
1017            _ => Err(TypeError::InvalidCast { from, to: target }),
1018        },
1019        DType::Int64 | DType::Int64Nullable => match &value {
1020            Scalar::Bool(v) => Ok(Scalar::Int64(i64::from(*v))),
1021            Scalar::Float64(v) => {
1022                // pandas astype(int64) truncates a finite float toward zero
1023                // (1.9 -> 1, -1.9 -> -1, 2.5 -> 2); only non-finite (NaN/±inf)
1024                // or out-of-i64-range values raise. Verified vs pandas 2.2.3
1025                // (br-frankenpandas-qcutc). NaN is handled as missing above, so
1026                // here non-finite means ±inf. `as i64` performs the toward-zero
1027                // truncation for in-range finite values.
1028                if !v.is_finite() {
1029                    return Err(TypeError::LossyFloatToInt { value: *v });
1030                }
1031                if *v < i64::MIN as f64 || *v >= 9223372036854775808.0 {
1032                    return Err(TypeError::LossyFloatToInt { value: *v });
1033                }
1034                Ok(Scalar::Int64(*v as i64))
1035            }
1036            Scalar::Utf8(s) => {
1037                // Try direct int parse first, then try float parse + truncate
1038                // (pandas accepts "1.0" as valid int via float intermediate)
1039                if let Ok(v) = s.parse::<i64>() {
1040                    return Ok(Scalar::Int64(v));
1041                }
1042                if let Ok(f) = s.parse::<f64>()
1043                    && f.is_finite()
1044                    && f.fract() == 0.0
1045                    && f >= i64::MIN as f64
1046                    && f < 9223372036854775808.0
1047                {
1048                    return Ok(Scalar::Int64(f as i64));
1049                }
1050                Err(TypeError::InvalidCast { from, to: target })
1051            }
1052            _ => Err(TypeError::InvalidCast { from, to: target }),
1053        },
1054        DType::Float64 => match &value {
1055            Scalar::Bool(v) => Ok(Scalar::Float64(if *v { 1.0 } else { 0.0 })),
1056            Scalar::Int64(v) => Ok(Scalar::Float64(*v as f64)),
1057            Scalar::Utf8(s) => s
1058                .parse::<f64>()
1059                .map(Scalar::Float64)
1060                .map_err(|_| TypeError::InvalidCast { from, to: target }),
1061            _ => Err(TypeError::InvalidCast { from, to: target }),
1062        },
1063        DType::Utf8 => Ok(Scalar::Utf8(scalar_to_string_for_astype(value))),
1064        DType::Categorical => Err(TypeError::InvalidCast { from, to: target }),
1065        DType::Timedelta64 => match &value {
1066            Scalar::Int64(v) => Ok(Scalar::Timedelta64(*v)),
1067            Scalar::Utf8(s) => Timedelta::parse(s)
1068                .map(Scalar::Timedelta64)
1069                .map_err(|_| TypeError::InvalidCast { from, to: target }),
1070            _ => Err(TypeError::InvalidCast { from, to: target }),
1071        },
1072        DType::Datetime64 => match &value {
1073            Scalar::Int64(v) => Ok(Scalar::Datetime64(*v)),
1074            Scalar::Utf8(s) => Timestamp::parse(s)
1075                .map(|timestamp| Scalar::Datetime64(timestamp.nanos))
1076                .map_err(|_| TypeError::InvalidCast { from, to: target }),
1077            _ => Err(TypeError::InvalidCast { from, to: target }),
1078        },
1079        DType::Period => match &value {
1080            Scalar::Int64(v) => Ok(Scalar::Period(*v)),
1081            Scalar::Utf8(s) => Period::parse(s)
1082                .map(|period| Scalar::Period(period.ordinal))
1083                .map_err(|_| TypeError::InvalidCast { from, to: target }),
1084            _ => Err(TypeError::InvalidCast { from, to: target }),
1085        },
1086        DType::Interval => match &value {
1087            Scalar::Utf8(s) => Interval::parse(s)
1088                .map(Scalar::Interval)
1089                .map_err(|_| TypeError::InvalidCast { from, to: target }),
1090            _ => Err(TypeError::InvalidCast { from, to: target }),
1091        },
1092        DType::Sparse => Err(TypeError::InvalidCast { from, to: target }),
1093    }
1094}
1095
1096fn scalar_to_string_for_astype(value: Scalar) -> String {
1097    match value {
1098        Scalar::Null(NullKind::Null) => "None".to_owned(),
1099        Scalar::Null(NullKind::NaN) => "nan".to_owned(),
1100        Scalar::Null(NullKind::NaT) => "NaT".to_owned(),
1101        Scalar::Bool(true) => "True".to_owned(),
1102        Scalar::Bool(false) => "False".to_owned(),
1103        Scalar::Int64(v) => v.to_string(),
1104        Scalar::Float64(v) => float_to_string_for_astype(v),
1105        Scalar::Utf8(s) => s,
1106        Scalar::Timedelta64(v) if v == Timedelta::NAT => "NaT".to_owned(),
1107        Scalar::Timedelta64(v) => Timedelta::format(v),
1108        Scalar::Datetime64(v) if v == Timestamp::NAT => "NaT".to_owned(),
1109        Scalar::Datetime64(v) => format!("Timestamp[{v}]"),
1110        Scalar::Period(v) if v == i64::MIN => "NaT".to_owned(),
1111        Scalar::Period(v) => format!("Period[{v}]"),
1112        Scalar::Interval(v) => v.to_string(),
1113    }
1114}
1115
1116fn float_to_string_for_astype(value: f64) -> String {
1117    if value.is_nan() {
1118        return "nan".to_owned();
1119    }
1120    if value.is_infinite() {
1121        return value.to_string(); // "inf" / "-inf"
1122    }
1123    // pandas astype(str) renders floats via Python str(float): whole numbers
1124    // keep ".0", decimals use the shortest round-trip, and extreme magnitudes use
1125    // scientific notation ("1e+16", "1e-05"). Rust's Debug formatter matches this
1126    // (shortest round-trip, ".0" on whole numbers, scientific at Python's
1127    // boundaries); only the exponent spelling differs (Rust "1e16"/"1e-5" vs
1128    // Python "1e+16"/"1e-05"), so normalize that. The old `{:.1}` whole / Display
1129    // decimal path lost scientific notation (1e16 -> "10000000000000000.0").
1130    let s = format!("{value:?}");
1131    match s.split_once('e') {
1132        None => s,
1133        Some((mantissa, exp)) => {
1134            let (sign, digits) = match exp.strip_prefix('-') {
1135                Some(d) => ('-', d),
1136                None => ('+', exp.strip_prefix('+').unwrap_or(exp)),
1137            };
1138            format!("{mantissa}e{sign}{digits:0>2}")
1139        }
1140    }
1141}
1142
1143/// Cast a scalar reference to a target dtype (clones only when conversion is needed).
1144pub fn cast_scalar(value: &Scalar, target: DType) -> Result<Scalar, TypeError> {
1145    cast_scalar_owned(value.clone(), target)
1146}
1147
1148// ── Timedelta support ──────────────────────────────────────────────────
1149
1150#[derive(Debug, Error, Clone, PartialEq)]
1151pub enum TimedeltaError {
1152    #[error("invalid timedelta string: {0}")]
1153    InvalidFormat(String),
1154    #[error("overflow in timedelta computation")]
1155    Overflow,
1156}
1157
1158#[derive(Debug, Clone, Copy, Default)]
1159pub struct TimedeltaComponents {
1160    pub days: i64,
1161    pub hours: i64,
1162    pub minutes: i64,
1163    pub seconds: i64,
1164    pub milliseconds: i64,
1165    pub microseconds: i64,
1166    pub nanoseconds: i64,
1167}
1168
1169pub struct Timedelta;
1170
1171impl Timedelta {
1172    pub const NANOS_PER_MICRO: i64 = 1_000;
1173    pub const NANOS_PER_MILLI: i64 = 1_000_000;
1174    pub const NANOS_PER_SEC: i64 = 1_000_000_000;
1175    pub const NANOS_PER_MIN: i64 = 60 * Self::NANOS_PER_SEC;
1176    pub const NANOS_PER_HOUR: i64 = 60 * Self::NANOS_PER_MIN;
1177    pub const NANOS_PER_DAY: i64 = 24 * Self::NANOS_PER_HOUR;
1178    pub const NANOS_PER_WEEK: i64 = 7 * Self::NANOS_PER_DAY;
1179
1180    pub const NAT: i64 = i64::MIN;
1181
1182    pub fn parse(s: &str) -> Result<i64, TimedeltaError> {
1183        let s = s.trim();
1184
1185        if s.eq_ignore_ascii_case("nat") {
1186            return Ok(Self::NAT);
1187        }
1188
1189        let (negative, s) = if let Some(rest) = s.strip_prefix('-') {
1190            (true, rest.trim())
1191        } else {
1192            (false, s)
1193        };
1194
1195        if let Some(nanos) = Self::try_parse_time_format(s) {
1196            return Ok(if negative { -nanos } else { nanos });
1197        }
1198
1199        if let Some(nanos) = Self::try_parse_iso8601_duration(s) {
1200            return Ok(if negative { -nanos } else { nanos });
1201        }
1202
1203        let nanos = Self::parse_compound(s)?;
1204        Ok(if negative { -nanos } else { nanos })
1205    }
1206
1207    /// Parse an ISO-8601 duration the way pandas `Timedelta` accepts it:
1208    /// a leading uppercase `P`, an optional `T` separator that is otherwise
1209    /// ignored, and unit letters `W`/`D`/`H`/`M`/`S` in any position. `M` is
1210    /// always MINUTES (never months), and only seconds may be fractional —
1211    /// years/months and lowercase units are rejected, matching pandas.
1212    /// (pandas mis-handles fractional non-second components; those are rejected
1213    /// here rather than reproducing the buggy value.) (br-frankenpandas-c3p8b)
1214    fn try_parse_iso8601_duration(s: &str) -> Option<i64> {
1215        let mut rest = s.strip_prefix('P')?;
1216        if rest.is_empty() {
1217            return None;
1218        }
1219        let mut total: i64 = 0;
1220        let mut saw_component = false;
1221        while !rest.is_empty() {
1222            if let Some(after_t) = rest.strip_prefix('T') {
1223                rest = after_t;
1224                continue;
1225            }
1226            let num_end = rest.find(|c: char| !c.is_ascii_digit() && c != '.')?;
1227            if num_end == 0 {
1228                return None;
1229            }
1230            let num_str = &rest[..num_end];
1231            let unit = rest.as_bytes()[num_end];
1232            let is_fractional = num_str.contains('.');
1233            rest = &rest[num_end + 1..];
1234
1235            let (multiplier, frac_ok) = match unit {
1236                b'W' => (Self::NANOS_PER_WEEK, false),
1237                b'D' => (Self::NANOS_PER_DAY, false),
1238                b'H' => (Self::NANOS_PER_HOUR, false),
1239                b'M' => (Self::NANOS_PER_MIN, false),
1240                b'S' => (Self::NANOS_PER_SEC, true),
1241                _ => return None,
1242            };
1243            if is_fractional {
1244                if !frac_ok {
1245                    return None;
1246                }
1247                let value: f64 = num_str.parse().ok()?;
1248                let product = value * multiplier as f64;
1249                if !product.is_finite() || product.abs() >= 9223372036854775808.0 {
1250                    return None;
1251                }
1252                total = total.checked_add(product.round() as i64)?;
1253            } else {
1254                let value: i64 = num_str.parse().ok()?;
1255                total = total.checked_add(value.checked_mul(multiplier)?)?;
1256            }
1257            saw_component = true;
1258        }
1259        saw_component.then_some(total)
1260    }
1261
1262    fn try_parse_time_format(s: &str) -> Option<i64> {
1263        let parts: Vec<&str> = s.split(':').collect();
1264        if parts.len() < 2 || parts.len() > 3 {
1265            return None;
1266        }
1267
1268        let hours: i64 = parts[0].parse().ok()?;
1269        let minutes: i64 = parts[1].parse().ok()?;
1270
1271        let (seconds, frac_nanos) = if parts.len() == 3 {
1272            if let Some((sec_str, frac_str)) = parts[2].split_once('.') {
1273                let sec: i64 = sec_str.parse().ok()?;
1274                if !frac_str.bytes().all(|byte| byte.is_ascii_digit()) {
1275                    return None;
1276                }
1277                let mut frac = 0_i64;
1278                let taken = frac_str.len().min(9);
1279                for byte in frac_str.bytes().take(9) {
1280                    frac = frac * 10 + i64::from(byte - b'0');
1281                }
1282                for _ in taken..9 {
1283                    frac *= 10;
1284                }
1285                (sec, frac)
1286            } else {
1287                let sec: i64 = parts[2].parse().ok()?;
1288                (sec, 0)
1289            }
1290        } else {
1291            (0, 0)
1292        };
1293
1294        hours
1295            .checked_mul(Self::NANOS_PER_HOUR)?
1296            .checked_add(minutes.checked_mul(Self::NANOS_PER_MIN)?)?
1297            .checked_add(seconds.checked_mul(Self::NANOS_PER_SEC)?)?
1298            .checked_add(frac_nanos)
1299    }
1300
1301    fn parse_compound(s: &str) -> Result<i64, TimedeltaError> {
1302        let mut total: i64 = 0;
1303        let mut remaining = s;
1304
1305        while !remaining.is_empty() {
1306            remaining = remaining.trim_start();
1307            if remaining.is_empty() {
1308                break;
1309            }
1310
1311            // Per br-frankenpandas-i9bah: check if remaining is a time format
1312            // (HH:MM:SS) which can appear after "N days " in pandas timedelta strings.
1313            if remaining.contains(':')
1314                && let Some(time_nanos) = Self::try_parse_time_format(remaining)
1315            {
1316                total = total
1317                    .checked_add(time_nanos)
1318                    .ok_or(TimedeltaError::Overflow)?;
1319                break;
1320            }
1321
1322            let num_end = remaining
1323                .find(|c: char| !c.is_ascii_digit() && c != '.' && c != '-')
1324                .unwrap_or(remaining.len());
1325
1326            if num_end == 0 {
1327                return Err(TimedeltaError::InvalidFormat(s.to_string()));
1328            }
1329
1330            let num_str = &remaining[..num_end];
1331            let num: f64 = num_str
1332                .parse()
1333                .map_err(|_| TimedeltaError::InvalidFormat(s.to_string()))?;
1334
1335            remaining = remaining[num_end..].trim_start();
1336
1337            let unit_end = remaining
1338                .find(|c: char| c.is_ascii_digit() || c.is_whitespace())
1339                .unwrap_or(remaining.len());
1340
1341            let unit = &remaining[..unit_end];
1342            remaining = &remaining[unit_end..];
1343
1344            let multiplier = Self::unit_to_nanos(unit)
1345                .ok_or_else(|| TimedeltaError::InvalidFormat(s.to_string()))?;
1346
1347            // Per br-frankenpandas-zw3mg: pandas raises OverflowError on
1348            // huge scientific-notation Timedeltas like "1e100 days". The
1349            // raw `as i64` cast silently saturated to i64::MAX, masking
1350            // overflow before checked_add could catch it.
1351            let product = num * multiplier as f64;
1352            if !product.is_finite() || product.abs() >= 9223372036854775808.0 {
1353                return Err(TimedeltaError::Overflow);
1354            }
1355            let nanos = product.round() as i64;
1356            total = total.checked_add(nanos).ok_or(TimedeltaError::Overflow)?;
1357        }
1358
1359        if total == 0 && !s.trim().is_empty() && s.trim() != "0" {
1360            return Err(TimedeltaError::InvalidFormat(s.to_string()));
1361        }
1362
1363        Ok(total)
1364    }
1365
1366    /// Map a pandas-style frequency-alias string to a nanosecond-count.
1367    ///
1368    /// Recognizes pandas's offset alias core set plus common word forms:
1369    /// W/week(s), D/day(s), H/hr/hour(s), m/T/min/minute(s), s/sec/second(s),
1370    /// ms/milli/millisecond(s)/L, us/µs/micro/microsecond(s)/U, ns/nano/
1371    /// nanosecond(s)/N. Empty string maps to days (matches pandas default).
1372    /// Returns `None` for unrecognized aliases — callers can choose to map
1373    /// that to NaT (consistent with the rest of fp-types) or surface as a
1374    /// typed error.
1375    ///
1376    /// Per br-frankenpandas-lbsx (9p0u Phase 2.6): public surface so
1377    /// downstream crates can consume the same alias map fp-types uses for
1378    /// `Timedelta::from_unit` / `Timestamp::*_to_unit`.
1379    #[must_use]
1380    pub fn unit_to_nanos(unit: &str) -> Option<i64> {
1381        match unit.to_lowercase().as_str() {
1382            "w" | "week" | "weeks" => Some(Self::NANOS_PER_WEEK),
1383            "d" | "day" | "days" => Some(Self::NANOS_PER_DAY),
1384            "h" | "hr" | "hour" | "hours" => Some(Self::NANOS_PER_HOUR),
1385            "m" | "min" | "minute" | "minutes" | "t" => Some(Self::NANOS_PER_MIN),
1386            "s" | "sec" | "second" | "seconds" => Some(Self::NANOS_PER_SEC),
1387            "ms" | "milli" | "millis" | "millisecond" | "milliseconds" | "l" => {
1388                Some(Self::NANOS_PER_MILLI)
1389            }
1390            "us" | "µs" | "micro" | "micros" | "microsecond" | "microseconds" | "u" => {
1391                Some(Self::NANOS_PER_MICRO)
1392            }
1393            "ns" | "nano" | "nanos" | "nanosecond" | "nanoseconds" | "n" => Some(1),
1394            "" => Some(Self::NANOS_PER_DAY),
1395            _ => None,
1396        }
1397    }
1398
1399    pub fn components(nanos: i64) -> TimedeltaComponents {
1400        if nanos == Self::NAT {
1401            return TimedeltaComponents::default();
1402        }
1403
1404        // pandas/Python normalize via FLOOR division (like format()): the days
1405        // component can be negative while the time-of-day remainder is always in
1406        // [0, 1 day). So pd.Timedelta(-1,'s').components == (-1, 23, 59, 59, 0, 0, 0),
1407        // NOT the abs-based (0, 0, 0, 1, 0, 0, 0).
1408        let days = nanos.div_euclid(Self::NANOS_PER_DAY);
1409        let rem = nanos.rem_euclid(Self::NANOS_PER_DAY);
1410
1411        let hours = rem / Self::NANOS_PER_HOUR;
1412        let rem = rem % Self::NANOS_PER_HOUR;
1413
1414        let minutes = rem / Self::NANOS_PER_MIN;
1415        let rem = rem % Self::NANOS_PER_MIN;
1416
1417        let seconds = rem / Self::NANOS_PER_SEC;
1418        let rem = rem % Self::NANOS_PER_SEC;
1419
1420        let milliseconds = rem / Self::NANOS_PER_MILLI;
1421        let rem = rem % Self::NANOS_PER_MILLI;
1422
1423        let microseconds = rem / Self::NANOS_PER_MICRO;
1424        let nanoseconds = rem % Self::NANOS_PER_MICRO;
1425
1426        TimedeltaComponents {
1427            days,
1428            hours,
1429            minutes,
1430            seconds,
1431            milliseconds,
1432            microseconds,
1433            nanoseconds,
1434        }
1435    }
1436
1437    pub fn total_seconds(nanos: i64) -> f64 {
1438        if nanos == Self::NAT {
1439            f64::NAN
1440        } else {
1441            nanos as f64 / Self::NANOS_PER_SEC as f64
1442        }
1443    }
1444
1445    /// Convert to specified time unit.
1446    ///
1447    /// Matches pd.Timedelta.as_unit(). Supported units: ns, us, ms, s, m, h, D.
1448    #[must_use]
1449    pub fn as_unit(nanos: i64, unit: &str) -> f64 {
1450        if nanos == Self::NAT {
1451            return f64::NAN;
1452        }
1453        let nanos_f = nanos as f64;
1454        match unit {
1455            "ns" | "nanoseconds" => nanos_f,
1456            "us" | "microseconds" => nanos_f / Self::NANOS_PER_MICRO as f64,
1457            "ms" | "milliseconds" => nanos_f / Self::NANOS_PER_MILLI as f64,
1458            "s" | "seconds" => nanos_f / Self::NANOS_PER_SEC as f64,
1459            "m" | "minutes" => nanos_f / Self::NANOS_PER_MIN as f64,
1460            "h" | "hours" => nanos_f / Self::NANOS_PER_HOUR as f64,
1461            "D" | "days" => nanos_f / Self::NANOS_PER_DAY as f64,
1462            _ => f64::NAN,
1463        }
1464    }
1465
1466    /// Return the days component. Matches `pd.Timedelta.days`.
1467    #[must_use]
1468    pub fn days(nanos: i64) -> i64 {
1469        if nanos == Self::NAT {
1470            return 0; // pandas returns 0 for NaT.days (no error)
1471        }
1472        // FLOOR division like pandas: pd.Timedelta(-1,'s').days == -1, not 0.
1473        nanos.div_euclid(Self::NANOS_PER_DAY)
1474    }
1475
1476    /// Return the seconds component (0-86399). Matches `pd.Timedelta.seconds`.
1477    #[must_use]
1478    pub fn seconds(nanos: i64) -> i64 {
1479        if nanos == Self::NAT {
1480            return 0;
1481        }
1482        // Floor-normalized time-of-day remainder: pd.Timedelta(-1,'s').seconds == 86399.
1483        nanos.rem_euclid(Self::NANOS_PER_DAY) / Self::NANOS_PER_SEC
1484    }
1485
1486    /// Return the microseconds component (0-999999). Matches `pd.Timedelta.microseconds`.
1487    #[must_use]
1488    pub fn microseconds(nanos: i64) -> i64 {
1489        if nanos == Self::NAT {
1490            return 0;
1491        }
1492        nanos.rem_euclid(Self::NANOS_PER_SEC) / Self::NANOS_PER_MICRO
1493    }
1494
1495    /// Return the nanoseconds component (0-999). Matches `pd.Timedelta.nanoseconds`.
1496    #[must_use]
1497    pub fn nanoseconds(nanos: i64) -> i64 {
1498        if nanos == Self::NAT {
1499            return 0;
1500        }
1501        nanos.rem_euclid(Self::NANOS_PER_MICRO)
1502    }
1503
1504    pub fn format(nanos: i64) -> String {
1505        if nanos == Self::NAT {
1506            return "NaT".to_string();
1507        }
1508
1509        // pandas / Python timedelta normalize via FLOOR division: the days
1510        // component can be negative while the time-of-day remainder is always
1511        // non-negative, and a negative-days value prints a '+' before the time
1512        // (e.g. -1s -> "-1 days +23:59:59", not "-0 days 00:00:01"). Compute the
1513        // components with Euclidean div/rem so the remainder is in [0, 1 day).
1514        let days = nanos.div_euclid(Self::NANOS_PER_DAY);
1515        let rem = nanos.rem_euclid(Self::NANOS_PER_DAY);
1516        let hours = rem / Self::NANOS_PER_HOUR;
1517        let minutes = (rem % Self::NANOS_PER_HOUR) / Self::NANOS_PER_MIN;
1518        let seconds = (rem % Self::NANOS_PER_MIN) / Self::NANOS_PER_SEC;
1519        let frac = rem % Self::NANOS_PER_SEC;
1520
1521        let time_part = format!("{hours:02}:{minutes:02}:{seconds:02}");
1522        // '+' joins the negative day count to the positive time remainder.
1523        let sep = if days < 0 { "+" } else { "" };
1524
1525        if frac > 0 {
1526            // pandas renders the sub-second part with microsecond precision
1527            // (6 digits) unless a sub-microsecond (nanosecond) component is
1528            // present, in which case it widens to 9 digits.
1529            if frac % 1_000 == 0 {
1530                format!("{days} days {sep}{time_part}.{:06}", frac / 1_000)
1531            } else {
1532                format!("{days} days {sep}{time_part}.{frac:09}")
1533            }
1534        } else {
1535            format!("{days} days {sep}{time_part}")
1536        }
1537    }
1538
1539    pub fn from_unit(value: f64, unit: &str) -> Result<i64, TimedeltaError> {
1540        let multiplier = Self::unit_to_nanos(unit)
1541            .ok_or_else(|| TimedeltaError::InvalidFormat(unit.to_string()))?;
1542        Ok((value * multiplier as f64).round() as i64)
1543    }
1544
1545    // ── Arithmetic (br-frankenpandas-4r56 Phase 1) ──────────────────────
1546    //
1547    // NaT propagation: any arithmetic with `NAT` returns `NAT`. Matches
1548    // pandas `pd.NaT + anything == NaT`, `pd.NaT - anything == NaT`, etc.
1549    // Saturation: i64 overflow clamps to i64::MAX/MIN (never wraps). Matches
1550    // pandas's OverflowError surface at the type-system boundary.
1551
1552    /// Add two Timedelta nanosecond values. NaT propagates; saturates on overflow.
1553    #[must_use]
1554    pub fn add(a: i64, b: i64) -> i64 {
1555        if a == Self::NAT || b == Self::NAT {
1556            return Self::NAT;
1557        }
1558        a.saturating_add(b)
1559    }
1560
1561    /// Subtract two Timedelta nanosecond values. NaT propagates; saturates on overflow.
1562    #[must_use]
1563    pub fn sub(a: i64, b: i64) -> i64 {
1564        if a == Self::NAT || b == Self::NAT {
1565            return Self::NAT;
1566        }
1567        a.saturating_sub(b)
1568    }
1569
1570    /// Negate a Timedelta value. NaT stays NaT. Saturates on overflow
1571    /// (pandas: `-pd.Timedelta.min` is NaT since min == -max - 1 cannot be negated).
1572    #[must_use]
1573    pub fn neg(a: i64) -> i64 {
1574        if a == Self::NAT {
1575            return Self::NAT;
1576        }
1577        a.saturating_neg()
1578    }
1579
1580    /// Absolute value of a Timedelta. NaT stays NaT. Saturates on overflow.
1581    #[must_use]
1582    pub fn abs(a: i64) -> i64 {
1583        if a == Self::NAT {
1584            return Self::NAT;
1585        }
1586        a.saturating_abs()
1587    }
1588
1589    /// Multiply a Timedelta value by an integer factor. NaT propagates;
1590    /// saturates on overflow.
1591    ///
1592    /// Matches pandas `pd.Timedelta(...) * int`.
1593    #[must_use]
1594    pub fn mul_scalar(a: i64, factor: i64) -> i64 {
1595        if a == Self::NAT {
1596            return Self::NAT;
1597        }
1598        a.saturating_mul(factor)
1599    }
1600
1601    /// Floor-divide a Timedelta value by an integer divisor. NaT propagates.
1602    /// Returns NaT on divide-by-zero (matches pandas, which raises, but we
1603    /// surface as NaT to avoid panics at the type-system boundary).
1604    ///
1605    /// Matches pandas / Python `pd.Timedelta(...) // int`: floor division,
1606    /// not truncation toward zero. `-100 // 3 == -34`, and `100 // -3 ==
1607    /// -34`. Rust's `/` truncates toward zero and `div_euclid` keeps the
1608    /// remainder non-negative — neither matches pandas when the divisor is
1609    /// negative. This helper adjusts trunc-toward-zero into floor.
1610    #[must_use]
1611    pub fn div_scalar(a: i64, divisor: i64) -> i64 {
1612        if a == Self::NAT || divisor == 0 {
1613            return Self::NAT;
1614        }
1615        // NAT == i64::MIN so the classic `i64::MIN / -1` overflow path is
1616        // already handled by the NAT check above. `(i64::MIN + 1) / -1`
1617        // equals `i64::MAX` with no overflow, so we never need a
1618        // saturation branch here.
1619        let q = a / divisor;
1620        let r = a % divisor;
1621        // If remainder is non-zero and has opposite sign from divisor,
1622        // Rust's trunc-toward-zero `/` is one step above the floor. Adjust
1623        // down by 1 to match Python/pandas floor division.
1624        if r != 0 && (r < 0) != (divisor < 0) {
1625            q - 1
1626        } else {
1627            q
1628        }
1629    }
1630
1631    /// Divide two Timedelta values, returning the ratio as f64.
1632    /// Matches pandas `pd.Timedelta(...) / pd.Timedelta(...)` → float.
1633    /// NaT in either operand → NaN. Zero divisor → ±Inf (per IEEE 754).
1634    #[must_use]
1635    pub fn div_timedelta(a: i64, b: i64) -> f64 {
1636        if a == Self::NAT || b == Self::NAT {
1637            return f64::NAN;
1638        }
1639        (a as f64) / (b as f64)
1640    }
1641
1642    /// Returns ISO 8601 duration format string.
1643    ///
1644    /// Matches pandas `pd.Timedelta.isoformat()`. Returns format like
1645    /// "P1DT2H3M4.567890123S" for 1 day, 2 hours, 3 minutes, 4.567890123 seconds.
1646    /// NaT returns "NaT".
1647    #[must_use]
1648    pub fn isoformat(nanos: i64) -> String {
1649        if nanos == Self::NAT {
1650            return "NaT".to_string();
1651        }
1652
1653        let negative = nanos < 0;
1654        let abs_nanos = nanos.saturating_abs();
1655
1656        let days = abs_nanos / Self::NANOS_PER_DAY;
1657        let remaining = abs_nanos % Self::NANOS_PER_DAY;
1658
1659        let hours = remaining / Self::NANOS_PER_HOUR;
1660        let remaining = remaining % Self::NANOS_PER_HOUR;
1661
1662        let minutes = remaining / Self::NANOS_PER_MIN;
1663        let remaining = remaining % Self::NANOS_PER_MIN;
1664
1665        let seconds = remaining / Self::NANOS_PER_SEC;
1666        let sub_sec_nanos = remaining % Self::NANOS_PER_SEC;
1667
1668        let mut result = String::new();
1669        if negative {
1670            result.push('-');
1671        }
1672
1673        result.push_str(&format!("P{days}DT{hours}H{minutes}M"));
1674
1675        if sub_sec_nanos == 0 {
1676            result.push_str(&format!("{seconds}S"));
1677        } else {
1678            let frac = format!("{:09}", sub_sec_nanos);
1679            let trimmed = frac.trim_end_matches('0');
1680            result.push_str(&format!("{seconds}.{trimmed}S"));
1681        }
1682
1683        result
1684    }
1685
1686    /// Rounds down to the nearest frequency unit.
1687    ///
1688    /// Matches pandas `pd.Timedelta.floor(freq)`. NaT is preserved.
1689    #[must_use]
1690    pub fn floor(nanos: i64, freq: &str) -> i64 {
1691        if nanos == Self::NAT {
1692            return Self::NAT;
1693        }
1694        let Some(unit_nanos) = Self::unit_to_nanos(freq) else {
1695            return Self::NAT;
1696        };
1697        if unit_nanos == 0 {
1698            return Self::NAT;
1699        }
1700        let negative = nanos < 0;
1701        let abs_nanos = nanos.saturating_abs();
1702        let floored = (abs_nanos / unit_nanos) * unit_nanos;
1703        if negative { -floored } else { floored }
1704    }
1705
1706    /// Rounds up to the nearest frequency unit.
1707    ///
1708    /// Matches pandas `pd.Timedelta.ceil(freq)`. NaT is preserved.
1709    #[must_use]
1710    pub fn ceil(nanos: i64, freq: &str) -> i64 {
1711        if nanos == Self::NAT {
1712            return Self::NAT;
1713        }
1714        let Some(unit_nanos) = Self::unit_to_nanos(freq) else {
1715            return Self::NAT;
1716        };
1717        if unit_nanos == 0 {
1718            return Self::NAT;
1719        }
1720        let negative = nanos < 0;
1721        let abs_nanos = nanos.saturating_abs();
1722        let ceiled = ((abs_nanos + unit_nanos - 1) / unit_nanos) * unit_nanos;
1723        if negative { -ceiled } else { ceiled }
1724    }
1725
1726    /// Rounds to the nearest frequency unit.
1727    ///
1728    /// Matches pandas `pd.Timedelta.round(freq)`. Uses banker's rounding
1729    /// (round half to even). NaT is preserved.
1730    #[must_use]
1731    pub fn round(nanos: i64, freq: &str) -> i64 {
1732        if nanos == Self::NAT {
1733            return Self::NAT;
1734        }
1735        let Some(unit_nanos) = Self::unit_to_nanos(freq) else {
1736            return Self::NAT;
1737        };
1738        if unit_nanos == 0 {
1739            return Self::NAT;
1740        }
1741        let negative = nanos < 0;
1742        let abs_nanos = nanos.saturating_abs();
1743
1744        let quotient = abs_nanos / unit_nanos;
1745        let remainder = abs_nanos % unit_nanos;
1746        let half = unit_nanos / 2;
1747
1748        let rounded = if remainder > half {
1749            (quotient + 1) * unit_nanos
1750        } else if remainder < half {
1751            quotient * unit_nanos
1752        } else {
1753            // Exactly half: round to even
1754            if quotient % 2 == 0 {
1755                quotient * unit_nanos
1756            } else {
1757                (quotient + 1) * unit_nanos
1758            }
1759        };
1760
1761        if negative { -rounded } else { rounded }
1762    }
1763}
1764
1765// ── Timestamp types (br-frankenpandas-9p0u — 4r56 Phase 2) ─────────────
1766//
1767// Nanosecond-precision i64 since Unix epoch + optional IANA tz name.
1768// TZ-dependent arithmetic (DST transitions, tz conversion) is deferred
1769// to Phase 3 which pulls chrono_tz into fp-types; Phase 2 stores the
1770// tz name as opaque metadata and performs arithmetic on the absolute
1771// nanos axis only.
1772
1773/// Number of days in a given month (1-12) of a given year.
1774fn days_in_month(year: i64, month: u32) -> Option<u32> {
1775    if !(1..=12).contains(&month) {
1776        return None;
1777    }
1778    let is_leap = (year % 4 == 0 && year % 100 != 0) || year % 400 == 0;
1779    let days: [u32; 12] = [
1780        31,
1781        if is_leap { 29 } else { 28 },
1782        31,
1783        30,
1784        31,
1785        30,
1786        31,
1787        31,
1788        30,
1789        31,
1790        30,
1791        31,
1792    ];
1793    Some(days[(month - 1) as usize])
1794}
1795
1796/// Number of ISO-8601 weeks in a year (52 or 53).
1797///
1798/// A year has 53 ISO weeks iff its first day falls on a Thursday, or it is a
1799/// leap year whose first day is a Wednesday — captured by the dominical
1800/// closed form `p(year) == 4 || p(year - 1) == 3`, where
1801/// `p(y) = (y + ⌊y/4⌋ − ⌊y/100⌋ + ⌊y/400⌋) mod 7` is the weekday of Dec 31.
1802fn iso_weeks_in_year(year: i64) -> i64 {
1803    fn p(y: i64) -> i64 {
1804        (y + y.div_euclid(4) - y.div_euclid(100) + y.div_euclid(400)).rem_euclid(7)
1805    }
1806    if p(year) == 4 || p(year - 1) == 3 {
1807        53
1808    } else {
1809        52
1810    }
1811}
1812
1813/// A nanosecond-precision point in time, Unix-epoch anchored.
1814///
1815/// Phase 2 scope: construction, arithmetic, equality, ordering, serde.
1816/// TZ semantics (IANA tz lookup, DST-aware shift) are deferred to Phase
1817/// 3 — see br-frankenpandas-4r56.
1818#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
1819pub struct Timestamp {
1820    /// Nanoseconds since Unix epoch. `i64::MIN` is NaT.
1821    pub nanos: i64,
1822    /// Optional IANA time-zone name (e.g. `"US/Eastern"`). `None` means
1823    /// naive / UTC-anchored. Phase 2 treats this as opaque metadata;
1824    /// Phase 3 wires chrono_tz interpretation.
1825    #[serde(default, skip_serializing_if = "Option::is_none")]
1826    pub tz: Option<String>,
1827}
1828
1829impl Timestamp {
1830    /// NaT sentinel, parallel to `Timedelta::NAT`.
1831    pub const NAT: i64 = i64::MIN;
1832
1833    /// Construct a UTC-anchored (tz=None) Timestamp from nanoseconds
1834    /// since Unix epoch.
1835    #[must_use]
1836    pub const fn from_nanos(nanos: i64) -> Self {
1837        Self { nanos, tz: None }
1838    }
1839
1840    /// Construct a Timestamp tagged with an IANA tz name.
1841    ///
1842    /// Phase 2 doesn't interpret the tz — it only carries the name
1843    /// through arithmetic + serde. Phase 3 wires chrono_tz conversion.
1844    #[must_use]
1845    pub fn from_nanos_tz(nanos: i64, tz_name: impl Into<String>) -> Self {
1846        Self {
1847            nanos,
1848            tz: Some(tz_name.into()),
1849        }
1850    }
1851
1852    /// Returns the current UTC timestamp.
1853    ///
1854    /// Matches `pd.Timestamp.now()` / `pd.Timestamp.utcnow()`.
1855    #[must_use]
1856    pub fn now() -> Self {
1857        use std::time::{SystemTime, UNIX_EPOCH};
1858        let duration = SystemTime::now()
1859            .duration_since(UNIX_EPOCH)
1860            .unwrap_or_default();
1861        let nanos = duration.as_nanos() as i64;
1862        Self { nanos, tz: None }
1863    }
1864
1865    /// Alias for `now()`. Matches `pd.Timestamp.utcnow()`.
1866    #[must_use]
1867    pub fn utcnow() -> Self {
1868        Self::now()
1869    }
1870
1871    /// Returns today's date at midnight UTC.
1872    ///
1873    /// Matches `pd.Timestamp.today()`.
1874    #[must_use]
1875    pub fn today() -> Self {
1876        let now = Self::now();
1877        now.normalize()
1878    }
1879
1880    /// The NaT sentinel value for a Timestamp.
1881    #[must_use]
1882    pub const fn nat() -> Self {
1883        Self {
1884            nanos: Self::NAT,
1885            tz: None,
1886        }
1887    }
1888
1889    /// True iff this Timestamp is NaT.
1890    #[must_use]
1891    pub const fn is_nat(&self) -> bool {
1892        self.nanos == Self::NAT
1893    }
1894
1895    /// Nanoseconds since Unix epoch, matching `pd.Timestamp.value`.
1896    #[must_use]
1897    pub const fn value(&self) -> i64 {
1898        self.nanos
1899    }
1900
1901    /// Stored resolution unit, matching `pd.Timestamp.unit`.
1902    ///
1903    /// FrankenPandas `Timestamp` stores nanoseconds internally, so non-NaT
1904    /// values report `ns`. `NaT` has no unit.
1905    #[must_use]
1906    pub const fn unit(&self) -> Option<&'static str> {
1907        if self.is_nat() { None } else { Some("ns") }
1908    }
1909
1910    /// Return the resolution of the timestamp (always "ns" for nanoseconds).
1911    ///
1912    /// Matches `pd.Timestamp.resolution`. Returns None for NaT.
1913    #[must_use]
1914    pub const fn resolution(&self) -> Option<&'static str> {
1915        if self.is_nat() { None } else { Some("ns") }
1916    }
1917
1918    /// Numpy datetime64 scalar payload, matching `pd.Timestamp.asm8`.
1919    #[must_use]
1920    pub const fn asm8(&self) -> i64 {
1921        self.value()
1922    }
1923
1924    /// Convert to a datetime64 payload, matching `pd.Timestamp.to_datetime64()`.
1925    #[must_use]
1926    pub const fn to_datetime64(&self) -> i64 {
1927        self.value()
1928    }
1929
1930    /// Convert to a numpy scalar payload, matching `pd.Timestamp.to_numpy()`.
1931    #[must_use]
1932    pub const fn to_numpy(&self) -> i64 {
1933        self.value()
1934    }
1935
1936    /// POSIX timestamp in seconds, matching `pd.Timestamp.timestamp()`.
1937    ///
1938    /// Pandas exposes this through Python's datetime surface, so sub-microsecond
1939    /// nanoseconds are rounded to six decimal places. `NaT` raises in pandas;
1940    /// fp-types surfaces the same condition as a missing-value error.
1941    pub fn timestamp(&self) -> Result<f64, TypeError> {
1942        if self.is_nat() {
1943            return Err(TypeError::ValueIsMissing {
1944                kind: NullKind::NaT,
1945            });
1946        }
1947        let seconds = self.nanos as f64 / 1_000_000_000.0;
1948        let rounded = format!("{seconds:.6}").parse().unwrap_or(seconds);
1949        Ok(rounded)
1950    }
1951
1952    /// Add a Timedelta. NaT in either operand → NaT; saturates on overflow.
1953    /// TZ is preserved from `self`.
1954    #[must_use]
1955    pub fn add_timedelta(&self, td_nanos: i64) -> Self {
1956        if self.is_nat() || td_nanos == Timedelta::NAT {
1957            return Self::nat();
1958        }
1959        Self {
1960            nanos: self.nanos.saturating_add(td_nanos),
1961            tz: self.tz.clone(),
1962        }
1963    }
1964
1965    /// Subtract a Timedelta. NaT propagation + saturation; TZ preserved.
1966    #[must_use]
1967    pub fn sub_timedelta(&self, td_nanos: i64) -> Self {
1968        if self.is_nat() || td_nanos == Timedelta::NAT {
1969            return Self::nat();
1970        }
1971        Self {
1972            nanos: self.nanos.saturating_sub(td_nanos),
1973            tz: self.tz.clone(),
1974        }
1975    }
1976
1977    /// Subtract another Timestamp. Returns a Timedelta (i64 nanos).
1978    /// NaT in either → `Timedelta::NAT`; saturates on overflow.
1979    #[must_use]
1980    pub fn sub_timestamp(&self, other: &Self) -> i64 {
1981        if self.is_nat() || other.is_nat() {
1982            return Timedelta::NAT;
1983        }
1984        self.nanos.saturating_sub(other.nanos)
1985    }
1986
1987    /// NaT-aware semantic equality: two NaT Timestamps are equal to each
1988    /// other (matches pandas `pd.NaT == pd.NaT` under `equals()`, though
1989    /// pandas's `==` operator returns False for NaT==NaT — we follow the
1990    /// `semantic_eq` convention used elsewhere in fp-types).
1991    #[must_use]
1992    pub fn semantic_eq(&self, other: &Self) -> bool {
1993        if self.is_nat() && other.is_nat() {
1994            return true;
1995        }
1996        if self.is_nat() || other.is_nat() {
1997            return false;
1998        }
1999        self.nanos == other.nanos && self.tz == other.tz
2000    }
2001
2002    // ── Rounding to a Timedelta unit (br-frankenpandas-5h6n) ────────────
2003    //
2004    // Pure i64 arithmetic on the nanos axis. tz is preserved. Phase 3
2005    // chrono_tz integration will add a tz-aware variant that handles DST
2006    // boundaries correctly; these methods operate on the absolute time
2007    // axis, matching pandas's tz-naive `.floor` / `.ceil` / `.round`
2008    // semantics for unit values smaller than a day.
2009
2010    /// Round down to the nearest multiple of `unit_nanos`.
2011    ///
2012    /// Matches `pd.Timestamp(...).floor(unit)`. NaT in `self` or a
2013    /// non-positive `unit_nanos` returns NaT.
2014    #[must_use]
2015    pub fn floor_to(&self, unit_nanos: i64) -> Self {
2016        if self.is_nat() || unit_nanos <= 0 {
2017            return Self::nat();
2018        }
2019        Self {
2020            nanos: self.nanos.div_euclid(unit_nanos) * unit_nanos,
2021            tz: self.tz.clone(),
2022        }
2023    }
2024
2025    /// Round up to the nearest multiple of `unit_nanos`.
2026    ///
2027    /// Matches `pd.Timestamp(...).ceil(unit)`. NaT or non-positive
2028    /// `unit_nanos` returns NaT. Already-multiple inputs return self.
2029    #[must_use]
2030    pub fn ceil_to(&self, unit_nanos: i64) -> Self {
2031        if self.is_nat() || unit_nanos <= 0 {
2032            return Self::nat();
2033        }
2034        let rem = self.nanos.rem_euclid(unit_nanos);
2035        let nanos = if rem == 0 {
2036            self.nanos
2037        } else {
2038            self.nanos.saturating_add(unit_nanos - rem)
2039        };
2040        Self {
2041            nanos,
2042            tz: self.tz.clone(),
2043        }
2044    }
2045
2046    /// Round to the nearest multiple of `unit_nanos`, banker's rounding
2047    /// (half-to-even) on ties.
2048    ///
2049    /// Matches `pd.Timestamp(...).round(unit)`. NaT or non-positive
2050    /// `unit_nanos` returns NaT.
2051    #[must_use]
2052    pub fn round_to(&self, unit_nanos: i64) -> Self {
2053        if self.is_nat() || unit_nanos <= 0 {
2054            return Self::nat();
2055        }
2056        let floor = self.nanos.div_euclid(unit_nanos);
2057        let rem = self.nanos.rem_euclid(unit_nanos);
2058        let half = unit_nanos / 2;
2059        let chosen_floor = if rem < half {
2060            floor
2061        } else if rem > half {
2062            floor + 1
2063        } else if unit_nanos % 2 != 0 {
2064            // Odd unit can't have a true half; treat as round-up.
2065            floor + 1
2066        } else {
2067            // Tie: pick the even multiple.
2068            if floor % 2 == 0 { floor } else { floor + 1 }
2069        };
2070        Self {
2071            nanos: chosen_floor.saturating_mul(unit_nanos),
2072            tz: self.tz.clone(),
2073        }
2074    }
2075
2076    // ── String-unit rounding (br-frankenpandas-lbsx) ────────────────────
2077    //
2078    // Pandas convenience: `.floor('H')` / `.ceil('1D')` / `.round('s')`.
2079    // These delegate to `Timedelta::unit_to_nanos` for unit lookup, then to
2080    // the nanos-based `floor_to`/`ceil_to`/`round_to`. Unknown unit strings
2081    // return NaT, matching the rest of fp-types' "missing-input → missing-
2082    // output" convention.
2083
2084    /// Round down to the nearest multiple of the named unit.
2085    ///
2086    /// Matches `pd.Timestamp(...).floor(unit)`. Unknown unit → NaT.
2087    #[must_use]
2088    pub fn floor_to_unit(&self, unit: &str) -> Self {
2089        match Timedelta::unit_to_nanos(unit) {
2090            Some(unit_nanos) => self.floor_to(unit_nanos),
2091            None => Self::nat(),
2092        }
2093    }
2094
2095    /// Round up to the nearest multiple of the named unit.
2096    ///
2097    /// Matches `pd.Timestamp(...).ceil(unit)`. Unknown unit → NaT.
2098    #[must_use]
2099    pub fn ceil_to_unit(&self, unit: &str) -> Self {
2100        match Timedelta::unit_to_nanos(unit) {
2101            Some(unit_nanos) => self.ceil_to(unit_nanos),
2102            None => Self::nat(),
2103        }
2104    }
2105
2106    /// Round to the nearest multiple of the named unit, banker's rounding.
2107    ///
2108    /// Matches `pd.Timestamp(...).round(unit)`. Unknown unit → NaT.
2109    #[must_use]
2110    pub fn round_to_unit(&self, unit: &str) -> Self {
2111        match Timedelta::unit_to_nanos(unit) {
2112            Some(unit_nanos) => self.round_to(unit_nanos),
2113            None => Self::nat(),
2114        }
2115    }
2116
2117    /// Pandas-named alias for [`floor_to_unit`](Self::floor_to_unit).
2118    #[must_use]
2119    pub fn floor(&self, freq: &str) -> Self {
2120        self.floor_to_unit(freq)
2121    }
2122
2123    /// Pandas-named alias for [`ceil_to_unit`](Self::ceil_to_unit).
2124    #[must_use]
2125    pub fn ceil(&self, freq: &str) -> Self {
2126        self.ceil_to_unit(freq)
2127    }
2128
2129    /// Pandas-named alias for [`round_to_unit`](Self::round_to_unit).
2130    #[must_use]
2131    pub fn round(&self, freq: &str) -> Self {
2132        self.round_to_unit(freq)
2133    }
2134
2135    /// Extract the year component from the timestamp.
2136    ///
2137    /// Matches `pd.Timestamp.year`. Returns None for NaT.
2138    #[must_use]
2139    pub fn year(&self) -> Option<i64> {
2140        if self.is_nat() {
2141            return None;
2142        }
2143        let total_secs = self.nanos / Timedelta::NANOS_PER_SEC;
2144        let days_since_epoch = total_secs / 86400;
2145        let days = days_since_epoch + 719_468;
2146        let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
2147        let doe = days - era * 146_097;
2148        let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
2149        let y = yoe + era * 400;
2150        let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
2151        let mp = (5 * doy + 2) / 153;
2152        let m = if mp < 10 { mp + 3 } else { mp - 9 };
2153        Some(if m <= 2 { y + 1 } else { y })
2154    }
2155
2156    /// Extract the month component (1-12) from the timestamp.
2157    ///
2158    /// Matches `pd.Timestamp.month`. Returns None for NaT.
2159    #[must_use]
2160    pub fn month(&self) -> Option<i64> {
2161        if self.is_nat() {
2162            return None;
2163        }
2164        let total_secs = self.nanos / Timedelta::NANOS_PER_SEC;
2165        let days_since_epoch = total_secs / 86400;
2166        let days = days_since_epoch + 719_468;
2167        let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
2168        let doe = days - era * 146_097;
2169        let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
2170        let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
2171        let mp = (5 * doy + 2) / 153;
2172        Some(if mp < 10 { mp + 3 } else { mp - 9 })
2173    }
2174
2175    /// Extract the day component (1-31) from the timestamp.
2176    ///
2177    /// Matches `pd.Timestamp.day`. Returns None for NaT.
2178    #[must_use]
2179    pub fn day(&self) -> Option<i64> {
2180        if self.is_nat() {
2181            return None;
2182        }
2183        let total_secs = self.nanos / Timedelta::NANOS_PER_SEC;
2184        let days_since_epoch = total_secs / 86400;
2185        let days = days_since_epoch + 719_468;
2186        let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
2187        let doe = days - era * 146_097;
2188        let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
2189        let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
2190        let mp = (5 * doy + 2) / 153;
2191        Some(doy - (153 * mp + 2) / 5 + 1)
2192    }
2193
2194    /// Extract the hour component (0-23) from the timestamp.
2195    ///
2196    /// Matches `pd.Timestamp.hour`. Returns None for NaT.
2197    #[must_use]
2198    pub fn hour(&self) -> Option<i64> {
2199        if self.is_nat() {
2200            return None;
2201        }
2202        let total_secs = self.nanos / Timedelta::NANOS_PER_SEC;
2203        let secs_of_day = (total_secs % 86400 + 86400) % 86400;
2204        Some(secs_of_day / 3600)
2205    }
2206
2207    /// Extract the minute component (0-59) from the timestamp.
2208    ///
2209    /// Matches `pd.Timestamp.minute`. Returns None for NaT.
2210    #[must_use]
2211    pub fn minute(&self) -> Option<i64> {
2212        if self.is_nat() {
2213            return None;
2214        }
2215        let total_secs = self.nanos / Timedelta::NANOS_PER_SEC;
2216        let secs_of_day = (total_secs % 86400 + 86400) % 86400;
2217        Some((secs_of_day % 3600) / 60)
2218    }
2219
2220    /// Extract the second component (0-59) from the timestamp.
2221    ///
2222    /// Matches `pd.Timestamp.second`. Returns None for NaT.
2223    #[must_use]
2224    pub fn second(&self) -> Option<i64> {
2225        if self.is_nat() {
2226            return None;
2227        }
2228        let total_secs = self.nanos / Timedelta::NANOS_PER_SEC;
2229        let secs_of_day = (total_secs % 86400 + 86400) % 86400;
2230        Some(secs_of_day % 60)
2231    }
2232
2233    /// Extract the microsecond component (0-999999) from the timestamp.
2234    ///
2235    /// Matches `pd.Timestamp.microsecond`. Returns None for NaT.
2236    #[must_use]
2237    pub fn microsecond(&self) -> Option<i64> {
2238        if self.is_nat() {
2239            return None;
2240        }
2241        let sub_nanos = (self.nanos % Timedelta::NANOS_PER_SEC).unsigned_abs();
2242        Some((sub_nanos / 1000) as i64)
2243    }
2244
2245    /// Extract the nanosecond component (0-999) from the timestamp.
2246    ///
2247    /// Matches `pd.Timestamp.nanosecond`. Returns None for NaT.
2248    #[must_use]
2249    pub fn nanosecond(&self) -> Option<i64> {
2250        if self.is_nat() {
2251            return None;
2252        }
2253        let sub_nanos = (self.nanos % Timedelta::NANOS_PER_SEC).unsigned_abs();
2254        Some((sub_nanos % 1000) as i64)
2255    }
2256
2257    /// Return the day of the week (Monday=0, Sunday=6).
2258    ///
2259    /// Matches `pd.Timestamp.dayofweek`. Returns None for NaT.
2260    #[must_use]
2261    pub fn dayofweek(&self) -> Option<i64> {
2262        if self.is_nat() {
2263            return None;
2264        }
2265        let days_since_epoch = self.nanos / Timedelta::NANOS_PER_DAY;
2266        let dow = ((days_since_epoch + 3) % 7 + 7) % 7;
2267        Some(dow)
2268    }
2269
2270    /// Alias for dayofweek(). Matches `pd.Timestamp.weekday`.
2271    #[must_use]
2272    pub fn weekday(&self) -> Option<i64> {
2273        self.dayofweek()
2274    }
2275
2276    /// Alias for dayofweek(). Matches `pd.Timestamp.day_of_week`.
2277    #[must_use]
2278    pub fn day_of_week(&self) -> Option<i64> {
2279        self.dayofweek()
2280    }
2281
2282    /// Return the day of the year (1-366).
2283    ///
2284    /// Matches `pd.Timestamp.dayofyear`. Returns None for NaT.
2285    #[must_use]
2286    pub fn dayofyear(&self) -> Option<i64> {
2287        if self.is_nat() {
2288            return None;
2289        }
2290        let m = self.month()?;
2291        let d = self.day()?;
2292        let y = self.year()?;
2293        let is_leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
2294        let days_before: [i64; 12] = [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334];
2295        let base = days_before[(m - 1) as usize] + d;
2296        if is_leap && m > 2 {
2297            Some(base + 1)
2298        } else {
2299            Some(base)
2300        }
2301    }
2302
2303    /// Alias for dayofyear(). Matches `pd.Timestamp.day_of_year`.
2304    #[must_use]
2305    pub fn day_of_year(&self) -> Option<i64> {
2306        self.dayofyear()
2307    }
2308
2309    /// Return the proleptic Gregorian ordinal (number of days since Jan 1, year 1).
2310    ///
2311    /// Matches `pd.Timestamp.toordinal()`. Returns None for NaT.
2312    #[must_use]
2313    pub fn toordinal(&self) -> Option<i64> {
2314        if self.is_nat() {
2315            return None;
2316        }
2317        let y = self.year()?;
2318        let m = self.month()?;
2319        let d = self.day()?;
2320        // Algorithm: count days from year 1 to the start of the given year,
2321        // add days in the months before the given month, add the day of month.
2322        // Account for leap years.
2323        let y_minus_1 = y - 1;
2324        let mut ordinal = y_minus_1 * 365 + y_minus_1 / 4 - y_minus_1 / 100 + y_minus_1 / 400;
2325        let is_leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
2326        let days_before: [i64; 12] = [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334];
2327        ordinal += days_before[(m - 1) as usize];
2328        if is_leap && m > 2 {
2329            ordinal += 1;
2330        }
2331        ordinal += d;
2332        Some(ordinal)
2333    }
2334
2335    /// Construct a Timestamp from a proleptic Gregorian ordinal.
2336    ///
2337    /// Matches `pd.Timestamp.fromordinal(ordinal)`. Returns NaT for invalid ordinals.
2338    #[must_use]
2339    pub fn fromordinal(ordinal: i64) -> Self {
2340        if ordinal <= 0 {
2341            return Self {
2342                nanos: Self::NAT,
2343                tz: None,
2344            };
2345        }
2346        // Convert y/m/d to days since Unix epoch, then to nanos
2347        // Unix epoch is 1970-01-01, which is ordinal 719163
2348        let days_since_epoch = ordinal - 719163;
2349        let nanos = days_since_epoch * 24 * 60 * 60 * 1_000_000_000_i64;
2350        Self { nanos, tz: None }
2351    }
2352
2353    /// Return the Julian Date (astronomical day number).
2354    ///
2355    /// Matches `pd.Timestamp.to_julian_date()`. Returns NaN for NaT.
2356    /// The Julian Date is the continuous count of days since the beginning
2357    /// of the Julian Period (January 1, 4713 BC in the proleptic Julian calendar).
2358    #[must_use]
2359    pub fn to_julian_date(&self) -> f64 {
2360        if self.is_nat() {
2361            return f64::NAN;
2362        }
2363        // Gregorian ordinal 1 (Jan 1, year 1) corresponds to Julian Day 1721425.5
2364        // (at noon, since JD starts at noon)
2365        // For a timestamp at midnight, we subtract 0.5
2366        let ordinal = match self.toordinal() {
2367            Some(o) => o,
2368            None => return f64::NAN,
2369        };
2370        // Fractional day from time components
2371        let h = self.hour().unwrap_or(0) as f64;
2372        let m = self.minute().unwrap_or(0) as f64;
2373        let s = self.second().unwrap_or(0) as f64;
2374        let us = self.microsecond().unwrap_or(0) as f64;
2375        let ns = self.nanosecond().unwrap_or(0) as f64;
2376        let frac_day =
2377            (h + m / 60.0 + s / 3600.0 + us / 3_600_000_000.0 + ns / 3_600_000_000_000.0) / 24.0;
2378        // Julian day at midnight of ordinal 1 is 1721424.5
2379        1721424.5 + ordinal as f64 + frac_day
2380    }
2381
2382    /// Return the quarter (1-4) of the year.
2383    ///
2384    /// Matches `pd.Timestamp.quarter`. Returns None for NaT.
2385    #[must_use]
2386    pub fn quarter(&self) -> Option<i64> {
2387        self.month().map(|m| (m - 1) / 3 + 1)
2388    }
2389
2390    /// Return the ISO week number (1-53).
2391    ///
2392    /// Matches `pd.Timestamp.week`. Returns None for NaT.
2393    #[must_use]
2394    pub fn weekofyear(&self) -> Option<i64> {
2395        if self.is_nat() {
2396            return None;
2397        }
2398        let doy = self.dayofyear()?;
2399        let dow = self.dayofweek()?;
2400        let year = self.year()?;
2401        let iso_dow = if dow == 6 { 7 } else { dow + 1 };
2402        let week = (doy - iso_dow + 10) / 7;
2403        // ISO-8601 has 53-week years, so the clamps must consult the actual
2404        // week count, not hardcode 52/1: a week<1 belongs to the LAST week of
2405        // the previous year (52 OR 53), and a week beyond this year's count
2406        // wraps to week 1 of the next year. pandas isocalendar().week agrees:
2407        // 2021-01-01 -> 53 (2020 is a 53-week year), 2026-12-31 -> 53.
2408        if week < 1 {
2409            Some(iso_weeks_in_year(year - 1))
2410        } else if week > iso_weeks_in_year(year) {
2411            Some(1)
2412        } else {
2413            Some(week)
2414        }
2415    }
2416
2417    /// Alias for weekofyear(). Matches `pd.Timestamp.week`.
2418    #[must_use]
2419    pub fn week(&self) -> Option<i64> {
2420        self.weekofyear()
2421    }
2422
2423    /// Return the timestamp value in the specified unit.
2424    ///
2425    /// Matches `pd.Timestamp.value` when unit is nanoseconds.
2426    /// Supported units: "ns", "us", "ms", "s".
2427    #[must_use]
2428    pub fn to_unit(&self, unit: &str) -> Option<i64> {
2429        if self.is_nat() {
2430            return None;
2431        }
2432        match unit {
2433            "ns" | "nanosecond" | "nanoseconds" => Some(self.nanos),
2434            "us" | "microsecond" | "microseconds" => Some(self.nanos / 1_000),
2435            "ms" | "millisecond" | "milliseconds" => Some(self.nanos / 1_000_000),
2436            "s" | "second" | "seconds" => Some(self.nanos / 1_000_000_000),
2437            _ => None,
2438        }
2439    }
2440
2441    /// Whether the year is a leap year.
2442    ///
2443    /// Matches `pd.Timestamp.is_leap_year`. Returns None for NaT.
2444    #[must_use]
2445    pub fn is_leap_year(&self) -> Option<bool> {
2446        self.year()
2447            .map(|y| (y % 4 == 0 && y % 100 != 0) || y % 400 == 0)
2448    }
2449
2450    /// Whether the day is the first day of the month.
2451    ///
2452    /// Matches `pd.Timestamp.is_month_start`. Returns None for NaT.
2453    #[must_use]
2454    pub fn is_month_start(&self) -> Option<bool> {
2455        self.day().map(|d| d == 1)
2456    }
2457
2458    /// Whether the day is the last day of the month.
2459    ///
2460    /// Matches `pd.Timestamp.is_month_end`. Returns None for NaT.
2461    #[must_use]
2462    pub fn is_month_end(&self) -> Option<bool> {
2463        let y = self.year()?;
2464        let m = self.month()?;
2465        let d = self.day()?;
2466        let is_leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
2467        let days_in_month: [i64; 12] = [
2468            31,
2469            if is_leap { 29 } else { 28 },
2470            31,
2471            30,
2472            31,
2473            30,
2474            31,
2475            31,
2476            30,
2477            31,
2478            30,
2479            31,
2480        ];
2481        Some(d == days_in_month[(m - 1) as usize])
2482    }
2483
2484    /// Whether the day is the first day of a quarter.
2485    ///
2486    /// Matches `pd.Timestamp.is_quarter_start`. Returns None for NaT.
2487    #[must_use]
2488    pub fn is_quarter_start(&self) -> Option<bool> {
2489        let m = self.month()?;
2490        let d = self.day()?;
2491        Some(d == 1 && (m == 1 || m == 4 || m == 7 || m == 10))
2492    }
2493
2494    /// Whether the day is the last day of a quarter.
2495    ///
2496    /// Matches `pd.Timestamp.is_quarter_end`. Returns None for NaT.
2497    #[must_use]
2498    pub fn is_quarter_end(&self) -> Option<bool> {
2499        let m = self.month()?;
2500        let d = self.day()?;
2501        Some(
2502            (m == 3 && d == 31)
2503                || (m == 6 && d == 30)
2504                || (m == 9 && d == 30)
2505                || (m == 12 && d == 31),
2506        )
2507    }
2508
2509    /// Whether the day is the first day of the year (Jan 1).
2510    ///
2511    /// Matches `pd.Timestamp.is_year_start`. Returns None for NaT.
2512    #[must_use]
2513    pub fn is_year_start(&self) -> Option<bool> {
2514        let m = self.month()?;
2515        let d = self.day()?;
2516        Some(m == 1 && d == 1)
2517    }
2518
2519    /// Whether the day is the last day of the year (Dec 31).
2520    ///
2521    /// Matches `pd.Timestamp.is_year_end`. Returns None for NaT.
2522    #[must_use]
2523    pub fn is_year_end(&self) -> Option<bool> {
2524        let m = self.month()?;
2525        let d = self.day()?;
2526        Some(m == 12 && d == 31)
2527    }
2528
2529    /// Return the number of days in the month of this timestamp.
2530    ///
2531    /// Matches `pd.Timestamp.days_in_month`. Returns None for NaT.
2532    #[must_use]
2533    pub fn days_in_month(&self) -> Option<i64> {
2534        let y = self.year()?;
2535        let m = self.month()?;
2536        let is_leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
2537        let days: [i64; 12] = [
2538            31,
2539            if is_leap { 29 } else { 28 },
2540            31,
2541            30,
2542            31,
2543            30,
2544            31,
2545            31,
2546            30,
2547            31,
2548            30,
2549            31,
2550        ];
2551        Some(days[(m - 1) as usize])
2552    }
2553
2554    /// Alias for days_in_month(). Matches `pd.Timestamp.daysinmonth`.
2555    #[must_use]
2556    pub fn daysinmonth(&self) -> Option<i64> {
2557        self.days_in_month()
2558    }
2559
2560    /// Normalize to midnight/day boundary, matching `pd.Timestamp.normalize()`.
2561    #[must_use]
2562    pub fn normalize(&self) -> Self {
2563        self.floor_to_unit("D")
2564    }
2565
2566    /// Replace timestamp components with new values.
2567    ///
2568    /// Matches pd.Timestamp.replace(). None values keep the existing component.
2569    #[must_use]
2570    #[allow(clippy::too_many_arguments)]
2571    pub fn replace(
2572        &self,
2573        year: Option<i64>,
2574        month: Option<i64>,
2575        day: Option<i64>,
2576        hour: Option<i64>,
2577        minute: Option<i64>,
2578        second: Option<i64>,
2579        microsecond: Option<i64>,
2580        nanosecond: Option<i64>,
2581    ) -> Self {
2582        if self.is_nat() {
2583            return self.clone();
2584        }
2585        let cur_year = self.year().unwrap_or(1970);
2586        let cur_month = self.month().unwrap_or(1);
2587        let cur_day = self.day().unwrap_or(1);
2588        let cur_hour = self.hour().unwrap_or(0);
2589        let cur_minute = self.minute().unwrap_or(0);
2590        let cur_second = self.second().unwrap_or(0);
2591        let cur_micro = self.microsecond().unwrap_or(0);
2592        let cur_nano = self.nanosecond().unwrap_or(0);
2593
2594        let y = year.unwrap_or(cur_year);
2595        let mo = month.unwrap_or(cur_month);
2596        let d = day.unwrap_or(cur_day);
2597        let h = hour.unwrap_or(cur_hour);
2598        let mi = minute.unwrap_or(cur_minute);
2599        let s = second.unwrap_or(cur_second);
2600        let us = microsecond.unwrap_or(cur_micro);
2601        let ns = nanosecond.unwrap_or(cur_nano);
2602
2603        let days_from_epoch = Self::days_from_ymd(y, mo, d);
2604        let secs = h * 3600 + mi * 60 + s;
2605        let total_nanos = days_from_epoch * Timedelta::NANOS_PER_DAY
2606            + secs * Timedelta::NANOS_PER_SEC
2607            + us * Timedelta::NANOS_PER_MICRO
2608            + ns;
2609
2610        Self {
2611            nanos: total_nanos,
2612            tz: self.tz.clone(),
2613        }
2614    }
2615
2616    fn days_from_ymd(year: i64, month: i64, day: i64) -> i64 {
2617        let y = if month <= 2 { year - 1 } else { year };
2618        let era = if y >= 0 { y } else { y - 399 } / 400;
2619        let yoe = y - era * 400;
2620        let doy = (153 * (if month > 2 { month - 3 } else { month + 9 }) + 2) / 5 + day - 1;
2621        let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
2622        era * 146097 + doe - 719468
2623    }
2624
2625    /// Return an ISO 8601 string representation of the timestamp.
2626    ///
2627    /// Matches `pd.Timestamp.isoformat()`. NaT returns "NaT".
2628    #[must_use]
2629    pub fn isoformat(&self) -> String {
2630        if self.is_nat() {
2631            return "NaT".to_string();
2632        }
2633        let total_secs = self.nanos / Timedelta::NANOS_PER_SEC;
2634        let sub_nanos = (self.nanos % Timedelta::NANOS_PER_SEC).unsigned_abs();
2635        let days_since_epoch = total_secs / 86400;
2636        let secs_of_day = (total_secs % 86400 + 86400) % 86400;
2637
2638        let days = days_since_epoch + 719_468;
2639        let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
2640        let doe = days - era * 146_097;
2641        let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
2642        let y = yoe + era * 400;
2643        let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
2644        let mp = (5 * doy + 2) / 153;
2645        let d = doy - (153 * mp + 2) / 5 + 1;
2646        let m = if mp < 10 { mp + 3 } else { mp - 9 };
2647        let year = if m <= 2 { y + 1 } else { y };
2648
2649        let hour = secs_of_day / 3600;
2650        let minute = (secs_of_day % 3600) / 60;
2651        let second = secs_of_day % 60;
2652
2653        let base = if sub_nanos == 0 {
2654            format!("{year:04}-{m:02}-{d:02}T{hour:02}:{minute:02}:{second:02}")
2655        } else {
2656            let micros = sub_nanos / 1000;
2657            format!("{year:04}-{m:02}-{d:02}T{hour:02}:{minute:02}:{second:02}.{micros:06}")
2658        };
2659        match &self.tz {
2660            Some(tz) if tz == "UTC" => format!("{base}+00:00"),
2661            Some(tz) => format!("{base}[{tz}]"),
2662            None => base,
2663        }
2664    }
2665
2666    /// Alias for isoformat.
2667    #[must_use]
2668    pub fn to_iso8601(&self) -> String {
2669        self.isoformat()
2670    }
2671
2672    /// Parse a datetime string into a Timestamp.
2673    ///
2674    /// Supports ISO 8601 formats:
2675    /// - "2024-01-15" (date only, time defaults to 00:00:00)
2676    /// - "2024-01-15T10:30:00" (datetime)
2677    /// - "2024-01-15 10:30:00" (space separator)
2678    /// - "2024-01-15T10:30:00.123456" (with fractional seconds)
2679    /// - "2024-01-15T10:30:00Z" (UTC timezone)
2680    /// - "2024-01-15T10:30:00+05:30" (offset timezone)
2681    /// - "NaT" (Not a Timestamp)
2682    ///
2683    /// Matches `pd.Timestamp()` constructor behavior.
2684    pub fn parse(s: &str) -> Result<Self, TypeError> {
2685        let s = s.trim();
2686
2687        if s.eq_ignore_ascii_case("nat") {
2688            return Ok(Self::nat());
2689        }
2690
2691        let (datetime_part, tz) = Self::split_timezone(s);
2692
2693        let (date_part, time_part) = if datetime_part.contains('T') {
2694            datetime_part
2695                .split_once('T')
2696                .ok_or_else(|| TypeError::ValueNotParseable {
2697                    value: s.to_string(),
2698                    target: "Timestamp".to_string(),
2699                })?
2700        } else if datetime_part.contains(' ')
2701            && datetime_part.chars().filter(|&c| c == ' ').count() == 1
2702        {
2703            datetime_part
2704                .split_once(' ')
2705                .ok_or_else(|| TypeError::ValueNotParseable {
2706                    value: s.to_string(),
2707                    target: "Timestamp".to_string(),
2708                })?
2709        } else {
2710            (datetime_part, "00:00:00")
2711        };
2712
2713        let (year, month, day) =
2714            Self::parse_date(date_part).ok_or_else(|| TypeError::ValueNotParseable {
2715                value: s.to_string(),
2716                target: "Timestamp".to_string(),
2717            })?;
2718
2719        let (hour, minute, second, nanos) =
2720            Self::parse_time(time_part).ok_or_else(|| TypeError::ValueNotParseable {
2721                value: s.to_string(),
2722                target: "Timestamp".to_string(),
2723            })?;
2724
2725        let total_nanos = Self::ymd_hms_to_nanos(year, month, day, hour, minute, second, nanos);
2726
2727        Ok(if let Some(tz_name) = tz {
2728            Self::from_nanos_tz(total_nanos, tz_name)
2729        } else {
2730            Self::from_nanos(total_nanos)
2731        })
2732    }
2733
2734    fn split_timezone(s: &str) -> (&str, Option<String>) {
2735        if let Some(stripped) = s.strip_suffix('Z') {
2736            (stripped, Some("UTC".to_string()))
2737        } else if let Some(idx) = s.rfind('+') {
2738            if idx > 10 {
2739                (&s[..idx], Some(s[idx..].to_string()))
2740            } else {
2741                (s, None)
2742            }
2743        } else if let Some(idx) = s.rfind('-') {
2744            if idx > 10 && s[idx..].contains(':') {
2745                (&s[..idx], Some(s[idx..].to_string()))
2746            } else {
2747                (s, None)
2748            }
2749        } else {
2750            (s, None)
2751        }
2752    }
2753
2754    fn parse_date(s: &str) -> Option<(i64, u32, u32)> {
2755        let parts: Vec<&str> = s.split('-').collect();
2756        if parts.len() != 3 {
2757            return None;
2758        }
2759        let year: i64 = parts[0].parse().ok()?;
2760        let month: u32 = parts[1].parse().ok()?;
2761        let day: u32 = parts[2].parse().ok()?;
2762        if !(1..=days_in_month(year, month)?).contains(&day) {
2763            return None;
2764        }
2765        Some((year, month, day))
2766    }
2767
2768    fn parse_time(s: &str) -> Option<(u32, u32, u32, u64)> {
2769        let (time_str, frac_str) = s.split_once('.').unwrap_or((s, ""));
2770        let parts: Vec<&str> = time_str.split(':').collect();
2771        if parts.is_empty() || parts.len() > 3 {
2772            return None;
2773        }
2774        let hour: u32 = parts.first().and_then(|p| p.parse().ok())?;
2775        let minute: u32 = parts.get(1).and_then(|p| p.parse().ok()).unwrap_or(0);
2776        let second: u32 = parts.get(2).and_then(|p| p.parse().ok()).unwrap_or(0);
2777
2778        if hour > 23 || minute > 59 || second > 59 {
2779            return None;
2780        }
2781
2782        let nanos = if frac_str.is_empty() {
2783            0
2784        } else {
2785            let padded = format!("{:0<9}", &frac_str[..frac_str.len().min(9)]);
2786            padded.parse::<u64>().unwrap_or(0)
2787        };
2788
2789        Some((hour, minute, second, nanos))
2790    }
2791
2792    fn ymd_hms_to_nanos(
2793        year: i64,
2794        month: u32,
2795        day: u32,
2796        hour: u32,
2797        minute: u32,
2798        second: u32,
2799        sub_nanos: u64,
2800    ) -> i64 {
2801        let m = month as i64;
2802        let d = day as i64;
2803
2804        let y = if m <= 2 { year - 1 } else { year };
2805        let era = if y >= 0 { y } else { y - 399 } / 400;
2806        let yoe = y - era * 400;
2807        let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) + 2) / 5 + d - 1;
2808        let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
2809        let days_since_epoch = era * 146_097 + doe - 719_468;
2810
2811        let total_seconds = days_since_epoch * 86400
2812            + (hour as i64) * 3600
2813            + (minute as i64) * 60
2814            + (second as i64);
2815        total_seconds * Timedelta::NANOS_PER_SEC + sub_nanos as i64
2816    }
2817
2818    /// Format timestamp using strftime directives.
2819    ///
2820    /// Matches `pd.Timestamp.strftime(format)`. Supports: %Y (year), %m (month),
2821    /// %d (day), %H (hour), %M (minute), %S (second), %f (microsecond).
2822    /// NaT returns "NaT".
2823    #[must_use]
2824    pub fn strftime(&self, format: &str) -> String {
2825        if self.is_nat() {
2826            return "NaT".to_string();
2827        }
2828        let total_secs = self.nanos / Timedelta::NANOS_PER_SEC;
2829        let sub_nanos = (self.nanos % Timedelta::NANOS_PER_SEC).unsigned_abs();
2830
2831        let days_since_epoch = total_secs / 86400;
2832        let secs_of_day = (total_secs % 86400 + 86400) % 86400;
2833
2834        let days = days_since_epoch + 719_468;
2835        let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
2836        let doe = days - era * 146_097;
2837        let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
2838        let y = yoe + era * 400;
2839        let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
2840        let mp = (5 * doy + 2) / 153;
2841        let d = doy - (153 * mp + 2) / 5 + 1;
2842        let m = if mp < 10 { mp + 3 } else { mp - 9 };
2843        let year = if m <= 2 { y + 1 } else { y };
2844
2845        let hour = secs_of_day / 3600;
2846        let minute = (secs_of_day % 3600) / 60;
2847        let second = secs_of_day % 60;
2848        let micros = sub_nanos / 1000;
2849
2850        format
2851            .replace("%Y", &format!("{year:04}"))
2852            .replace("%m", &format!("{m:02}"))
2853            .replace("%d", &format!("{d:02}"))
2854            .replace("%H", &format!("{hour:02}"))
2855            .replace("%M", &format!("{minute:02}"))
2856            .replace("%S", &format!("{second:02}"))
2857            .replace("%f", &format!("{micros:06}"))
2858    }
2859
2860    /// Return the day of the week as a string (e.g., "Monday").
2861    ///
2862    /// Matches `pd.Timestamp.day_name()`. NaT returns "NaT".
2863    #[must_use]
2864    pub fn day_name(&self) -> String {
2865        const NAMES: [&str; 7] = [
2866            "Thursday",
2867            "Friday",
2868            "Saturday",
2869            "Sunday",
2870            "Monday",
2871            "Tuesday",
2872            "Wednesday",
2873        ];
2874        if self.is_nat() {
2875            return "NaT".to_string();
2876        }
2877        let days_since_epoch = self.nanos / Timedelta::NANOS_PER_DAY;
2878        let dow = ((days_since_epoch % 7) + 7) % 7;
2879        NAMES[dow as usize].to_string()
2880    }
2881
2882    /// Return the month name as a string (e.g., "January").
2883    ///
2884    /// Matches `pd.Timestamp.month_name()`. NaT returns "NaT".
2885    #[must_use]
2886    pub fn month_name(&self) -> String {
2887        const NAMES: [&str; 12] = [
2888            "January",
2889            "February",
2890            "March",
2891            "April",
2892            "May",
2893            "June",
2894            "July",
2895            "August",
2896            "September",
2897            "October",
2898            "November",
2899            "December",
2900        ];
2901        if self.is_nat() {
2902            return "NaT".to_string();
2903        }
2904        let total_secs = self.nanos / Timedelta::NANOS_PER_SEC;
2905        let days_since_epoch = total_secs / 86400;
2906        let days = days_since_epoch + 719_468;
2907        let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
2908        let doe = days - era * 146_097;
2909        let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
2910        let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
2911        let mp = (5 * doy + 2) / 153;
2912        let m = if mp < 10 { mp + 3 } else { mp - 9 };
2913        NAMES[(m - 1) as usize].to_string()
2914    }
2915
2916    /// Localize a naive timestamp to a timezone.
2917    ///
2918    /// Matches `pd.Timestamp.tz_localize(tz)`. If `tz` is None, removes the
2919    /// timezone (makes timestamp naive). NaT propagates.
2920    #[must_use]
2921    pub fn tz_localize(&self, tz: Option<&str>) -> Self {
2922        if self.is_nat() {
2923            return Self::nat();
2924        }
2925        Self {
2926            nanos: self.nanos,
2927            tz: tz.map(String::from),
2928        }
2929    }
2930
2931    /// Convert timezone-aware timestamp to another timezone.
2932    ///
2933    /// Matches `pd.Timestamp.tz_convert(tz)`. If timestamp is naive (no tz),
2934    /// the timezone is simply attached without conversion. NaT propagates.
2935    /// Note: actual UTC offset conversion requires chrono-tz (Phase 3).
2936    #[must_use]
2937    pub fn tz_convert(&self, tz: &str) -> Self {
2938        if self.is_nat() {
2939            return Self::nat();
2940        }
2941        Self {
2942            nanos: self.nanos,
2943            tz: Some(tz.to_string()),
2944        }
2945    }
2946
2947    /// Create a Timestamp from a Unix timestamp (seconds since epoch).
2948    ///
2949    /// Matches `pd.Timestamp.fromtimestamp(ts)`. The optional `tz` parameter
2950    /// specifies the timezone to localize to.
2951    #[must_use]
2952    pub fn fromtimestamp(ts: f64, tz: Option<&str>) -> Self {
2953        if ts.is_nan() || ts.is_infinite() {
2954            return Self::nat();
2955        }
2956        let nanos_f64 = ts * 1_000_000_000.0;
2957        // Check for overflow before casting - i64 range is roughly ±9.2e18
2958        const MAX_NANOS: f64 = i64::MAX as f64;
2959        const MIN_NANOS: f64 = i64::MIN as f64;
2960        if !(MIN_NANOS..=MAX_NANOS).contains(&nanos_f64) {
2961            return Self::nat();
2962        }
2963        Self {
2964            nanos: nanos_f64 as i64,
2965            tz: tz.map(String::from),
2966        }
2967    }
2968
2969    /// Create a Timestamp from milliseconds since epoch.
2970    ///
2971    /// Convenience constructor complementing fromtimestamp.
2972    #[must_use]
2973    pub fn from_millis(ms: i64, tz: Option<&str>) -> Self {
2974        Self {
2975            nanos: ms.saturating_mul(1_000_000),
2976            tz: tz.map(String::from),
2977        }
2978    }
2979
2980    /// Create a Timestamp from microseconds since epoch.
2981    ///
2982    /// Convenience constructor complementing fromtimestamp.
2983    #[must_use]
2984    pub fn from_micros(us: i64, tz: Option<&str>) -> Self {
2985        Self {
2986            nanos: us.saturating_mul(1_000),
2987            tz: tz.map(String::from),
2988        }
2989    }
2990
2991    /// Return the timezone string, or None if naive.
2992    #[must_use]
2993    pub fn tzinfo(&self) -> Option<&str> {
2994        self.tz.as_deref()
2995    }
2996
2997    /// Return the timezone name, or None if naive.
2998    ///
2999    /// Alias for tzinfo() matching pandas Timestamp.tzname().
3000    #[must_use]
3001    pub fn tzname(&self) -> Option<&str> {
3002        self.tzinfo()
3003    }
3004}
3005
3006impl std::fmt::Display for Timestamp {
3007    /// Phase 2 debug-style format; Phase 3 replaces with pandas ISO-8601
3008    /// notation once chrono interpretation lands.
3009    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
3010        if self.is_nat() {
3011            return f.write_str("NaT");
3012        }
3013        match &self.tz {
3014            Some(tz) => write!(f, "Timestamp[{}, {}]", self.nanos, tz),
3015            None => write!(f, "Timestamp[{}, UTC]", self.nanos),
3016        }
3017    }
3018}
3019
3020impl PartialOrd for Timestamp {
3021    /// Orders by nanos axis; NaT is incomparable (`None`). Tz difference
3022    /// does not affect ordering — two Timestamps at the same absolute
3023    /// nanos compare equal regardless of tz label (Phase 3 will revisit
3024    /// whether tz affects ordering semantics).
3025    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
3026        if self.is_nat() || other.is_nat() {
3027            return None;
3028        }
3029        Some(self.nanos.cmp(&other.nanos))
3030    }
3031}
3032
3033// ── Missingness utilities ──────────────────────────────────────────────
3034
3035pub fn isna(values: &[Scalar]) -> Vec<bool> {
3036    values.iter().map(Scalar::is_missing).collect()
3037}
3038
3039pub fn isnull(values: &[Scalar]) -> Vec<bool> {
3040    isna(values)
3041}
3042
3043pub fn notna(values: &[Scalar]) -> Vec<bool> {
3044    values.iter().map(|v| !v.is_missing()).collect()
3045}
3046
3047pub fn notnull(values: &[Scalar]) -> Vec<bool> {
3048    notna(values)
3049}
3050
3051pub fn count_na(values: &[Scalar]) -> usize {
3052    values.iter().filter(|v| v.is_missing()).count()
3053}
3054
3055pub fn fill_na(values: &[Scalar], fill: &Scalar) -> Vec<Scalar> {
3056    values
3057        .iter()
3058        .map(|v| {
3059            if v.is_missing() {
3060                fill.clone()
3061            } else {
3062                v.clone()
3063            }
3064        })
3065        .collect()
3066}
3067
3068pub fn dropna(values: &[Scalar]) -> Vec<Scalar> {
3069    values.iter().filter(|v| !v.is_missing()).cloned().collect()
3070}
3071
3072// ── Nanops: null-skipping numeric reductions ───────────────────────────
3073
3074fn collect_finite(values: &[Scalar]) -> Vec<f64> {
3075    values
3076        .iter()
3077        .filter(|v| !v.is_missing())
3078        .filter_map(|v| v.to_f64().ok())
3079        .collect()
3080}
3081
3082/// Per br-frankenpandas-620mj: if a column is uniformly Timedelta64
3083/// (with optional NAT/Null missing), sum/mean preserve Timedelta dtype
3084/// matching pandas — instead of silently coercing to Float64(0.0) via
3085/// the collect_finite path (which drops Timedelta64 because to_f64
3086/// errors). Returns Some(sum_in_ns, observed_count) when applicable.
3087fn collect_timedelta_ns(values: &[Scalar]) -> Option<(i128, usize)> {
3088    let mut sum: i128 = 0;
3089    let mut count: usize = 0;
3090    let mut saw_timedelta = false;
3091    for v in values {
3092        if v.is_missing() {
3093            continue;
3094        }
3095        match v {
3096            Scalar::Timedelta64(ns) => {
3097                saw_timedelta = true;
3098                sum += i128::from(*ns);
3099                count += 1;
3100            }
3101            // Any non-Timedelta non-missing value bails out to the
3102            // existing Float64 path, preserving cross-type behavior.
3103            _ => return None,
3104        }
3105    }
3106    if saw_timedelta {
3107        Some((sum, count))
3108    } else {
3109        None
3110    }
3111}
3112
3113pub fn nansum(values: &[Scalar]) -> Scalar {
3114    if let Some((sum, _)) = collect_timedelta_ns(values) {
3115        let clamped = sum.clamp(i128::from(i64::MIN), i128::from(i64::MAX));
3116        return Scalar::Timedelta64(clamped as i64);
3117    }
3118    // Fused single-pass fold: filter missing / non-f64-coercible and accumulate
3119    // in one scan, avoiding the intermediate `collect_finite` Vec<f64> and its
3120    // second pass. Bit-identical to `collect_finite(..).iter().sum()`: same
3121    // finite values in the same order, same left-fold f64 `+` (empty -> 0.0).
3122    let mut sum = 0.0_f64;
3123    for v in values {
3124        if v.is_missing() {
3125            continue;
3126        }
3127        if let Ok(x) = v.to_f64() {
3128            sum += x;
3129        }
3130    }
3131    Scalar::Float64(sum)
3132}
3133
3134pub fn nanmean(values: &[Scalar]) -> Scalar {
3135    if let Some((sum, count)) = collect_timedelta_ns(values) {
3136        if count == 0 {
3137            return Scalar::Timedelta64(Timedelta::NAT);
3138        }
3139        let mean = sum / count as i128;
3140        let clamped = mean.clamp(i128::from(i64::MIN), i128::from(i64::MAX));
3141        return Scalar::Timedelta64(clamped as i64);
3142    }
3143    // Fused single-pass fold (see `nansum`): accumulate sum + count of finite
3144    // values in one scan. Bit-identical to the prior `collect_finite` two-pass:
3145    // count == nums.len(), sum folds the same values in the same order.
3146    let mut sum = 0.0_f64;
3147    let mut count = 0usize;
3148    for v in values {
3149        if v.is_missing() {
3150            continue;
3151        }
3152        if let Ok(x) = v.to_f64() {
3153            sum += x;
3154            count += 1;
3155        }
3156    }
3157    if count == 0 {
3158        return Scalar::Null(NullKind::NaN);
3159    }
3160    Scalar::Float64(sum / count as f64)
3161}
3162
3163pub fn nanany(values: &[Scalar]) -> Scalar {
3164    for v in values {
3165        if v.is_missing() {
3166            continue;
3167        }
3168        match v {
3169            Scalar::Bool(flag) if *flag => return Scalar::Bool(true),
3170            Scalar::Int64(val) if *val != 0 => return Scalar::Bool(true),
3171            Scalar::Float64(val) if !val.is_nan() && *val != 0.0 => return Scalar::Bool(true),
3172            Scalar::Utf8(val) if !val.is_empty() => return Scalar::Bool(true),
3173            // pandas Series([td]).any() returns True for any non-zero
3174            // Timedelta. NaT is already filtered by is_missing() above.
3175            Scalar::Timedelta64(ns) if *ns != 0 => return Scalar::Bool(true),
3176            _ => continue,
3177        }
3178    }
3179    Scalar::Bool(false)
3180}
3181
3182pub fn nanall(values: &[Scalar]) -> Scalar {
3183    for v in values {
3184        if v.is_missing() {
3185            continue;
3186        }
3187        match v {
3188            Scalar::Bool(flag) if !*flag => return Scalar::Bool(false),
3189            Scalar::Int64(val) if *val == 0 => return Scalar::Bool(false),
3190            Scalar::Float64(val) if val.is_nan() || *val == 0.0 => return Scalar::Bool(false),
3191            Scalar::Utf8(val) if val.is_empty() => return Scalar::Bool(false),
3192            // pandas Series([td(0)]).all() returns False; any non-zero
3193            // Timedelta is truthy. NaT is already filtered by is_missing.
3194            Scalar::Timedelta64(ns) if *ns == 0 => return Scalar::Bool(false),
3195            _ => continue,
3196        }
3197    }
3198    Scalar::Bool(true)
3199}
3200
3201pub fn nancount(values: &[Scalar]) -> Scalar {
3202    let n = values.iter().filter(|v| !v.is_missing()).count();
3203    Scalar::Int64(n as i64)
3204}
3205
3206pub fn nanmin(values: &[Scalar]) -> Scalar {
3207    let mut min: Option<&Scalar> = None;
3208    for v in values {
3209        if v.is_missing() {
3210            continue;
3211        }
3212        match (min, v) {
3213            (None, _) => min = Some(v),
3214            (Some(Scalar::Int64(a)), Scalar::Int64(b)) => {
3215                if b < a {
3216                    min = Some(v)
3217                }
3218            }
3219            (Some(Scalar::Float64(a)), Scalar::Float64(b)) => {
3220                if *b < *a {
3221                    min = Some(v)
3222                }
3223            }
3224            (Some(Scalar::Utf8(a)), Scalar::Utf8(b)) => {
3225                if b < a {
3226                    min = Some(v)
3227                }
3228            }
3229            (Some(Scalar::Bool(a)), Scalar::Bool(b)) => {
3230                if b < a {
3231                    min = Some(v)
3232                }
3233            }
3234            // Per br-frankenpandas-yic5m: Timedelta64.to_f64() errors, so
3235            // the catch-all below would silently return NaN. Compare ns
3236            // representations directly; NAT is already filtered by
3237            // is_missing() above.
3238            (Some(Scalar::Timedelta64(a)), Scalar::Timedelta64(b)) => {
3239                if b < a {
3240                    min = Some(v)
3241                }
3242            }
3243            (Some(a), b) => match (a.to_f64(), b.to_f64()) {
3244                (Ok(af), Ok(bf)) if bf < af => min = Some(v),
3245                (Ok(_), Ok(_)) => {}
3246                _ => return Scalar::Null(NullKind::NaN),
3247            },
3248        }
3249    }
3250    match min {
3251        Some(v) => v.clone(),
3252        None => Scalar::Null(NullKind::NaN),
3253    }
3254}
3255
3256pub fn nanmax(values: &[Scalar]) -> Scalar {
3257    let mut max: Option<&Scalar> = None;
3258    for v in values {
3259        if v.is_missing() {
3260            continue;
3261        }
3262        match (max, v) {
3263            (None, _) => max = Some(v),
3264            (Some(Scalar::Int64(a)), Scalar::Int64(b)) => {
3265                if b > a {
3266                    max = Some(v)
3267                }
3268            }
3269            (Some(Scalar::Float64(a)), Scalar::Float64(b)) => {
3270                if *b > *a {
3271                    max = Some(v)
3272                }
3273            }
3274            (Some(Scalar::Utf8(a)), Scalar::Utf8(b)) => {
3275                if b > a {
3276                    max = Some(v)
3277                }
3278            }
3279            (Some(Scalar::Bool(a)), Scalar::Bool(b)) => {
3280                if b > a {
3281                    max = Some(v)
3282                }
3283            }
3284            // Per br-frankenpandas-yic5m: Timedelta64.to_f64() errors, so
3285            // the catch-all below would silently return NaN. Compare ns
3286            // representations directly; NAT is already filtered above.
3287            (Some(Scalar::Timedelta64(a)), Scalar::Timedelta64(b)) => {
3288                if b > a {
3289                    max = Some(v)
3290                }
3291            }
3292            (Some(a), b) => match (a.to_f64(), b.to_f64()) {
3293                (Ok(af), Ok(bf)) if bf > af => max = Some(v),
3294                (Ok(_), Ok(_)) => {}
3295                _ => return Scalar::Null(NullKind::NaN),
3296            },
3297        }
3298    }
3299    match max {
3300        Some(v) => v.clone(),
3301        None => Scalar::Null(NullKind::NaN),
3302    }
3303}
3304
3305/// Per br-frankenpandas-j8ntk: harvest ns values from a uniformly-Timedelta64
3306/// input as f64 (the f64 representation has 53 bits of mantissa, sufficient
3307/// for ns spans up to ~104 days exactly; beyond that pandas itself loses
3308/// precision the same way). Returns None if any non-missing value is not
3309/// Timedelta64.
3310fn collect_timedelta_ns_f64(values: &[Scalar]) -> Option<Vec<f64>> {
3311    let mut out = Vec::with_capacity(values.len());
3312    let mut saw_td = false;
3313    for v in values {
3314        if v.is_missing() {
3315            continue;
3316        }
3317        match v {
3318            Scalar::Timedelta64(ns) => {
3319                saw_td = true;
3320                out.push(*ns as f64);
3321            }
3322            _ => return None,
3323        }
3324    }
3325    if saw_td { Some(out) } else { None }
3326}
3327
3328/// Clamp an f64 result into i64 range and wrap as Scalar::Timedelta64.
3329fn float_ns_to_timedelta(value: f64) -> Scalar {
3330    if !value.is_finite() {
3331        return Scalar::Timedelta64(Timedelta::NAT);
3332    }
3333    let clamped = value.clamp(i64::MIN as f64, i64::MAX as f64);
3334    Scalar::Timedelta64(clamped as i64)
3335}
3336
3337pub fn nanmedian(values: &[Scalar]) -> Scalar {
3338    // Per br-frankenpandas-j8ntk: Timedelta64 median preserves dtype.
3339    if let Some(mut td) = collect_timedelta_ns_f64(values) {
3340        if td.is_empty() {
3341            return Scalar::Timedelta64(Timedelta::NAT);
3342        }
3343        // O(n) selection instead of a full sort (see the numeric arm below):
3344        // collect_timedelta_ns_f64 yields finite ns (NaT excluded), so the
3345        // comparator is a total order; order statistics depend only on values,
3346        // so the unstable partition yields the same td[mid-1]/td[mid].
3347        let n = td.len();
3348        let mid = n / 2;
3349        let cmp = |a: &f64, b: &f64| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal);
3350        let (left, mid_ref, _right) = td.select_nth_unstable_by(mid, cmp);
3351        let mid_val = *mid_ref;
3352        let median_ns = if n.is_multiple_of(2) {
3353            let lower = left.iter().copied().fold(f64::NEG_INFINITY, f64::max);
3354            (lower + mid_val) / 2.0
3355        } else {
3356            mid_val
3357        };
3358        return float_ns_to_timedelta(median_ns);
3359    }
3360    let mut nums = collect_finite(values);
3361    if nums.is_empty() {
3362        return Scalar::Null(NullKind::NaN);
3363    }
3364    // O(n) selection instead of an O(n log n) full sort: select_nth_unstable_by
3365    // places the `mid`-th smallest at index `mid` with all smaller elements
3366    // (unordered) in the left partition. For even n the (mid-1)-th smallest is
3367    // the MAX of that left partition. Bit-identical to the sort path: order
3368    // statistics depend only on VALUES, and ties share a value, so the
3369    // unstable partition yields the same nums[mid-1]/nums[mid] the sort did.
3370    let n = nums.len();
3371    let mid = n / 2;
3372    let cmp = |a: &f64, b: &f64| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal);
3373    let (left, mid_ref, _right) = nums.select_nth_unstable_by(mid, cmp);
3374    let mid_val = *mid_ref;
3375    if n.is_multiple_of(2) {
3376        let lower = left.iter().copied().fold(f64::NEG_INFINITY, f64::max);
3377        Scalar::Float64((lower + mid_val) / 2.0)
3378    } else {
3379        Scalar::Float64(mid_val)
3380    }
3381}
3382
3383pub fn nanvar(values: &[Scalar], ddof: usize) -> Scalar {
3384    // Per br-frankenpandas-j8ntk: Timedelta64 var preserves dtype — pandas
3385    // returns Timedelta even though variance is ns² conceptually; matching.
3386    if let Some(td) = collect_timedelta_ns_f64(values) {
3387        if td.len() <= ddof {
3388            return Scalar::Timedelta64(Timedelta::NAT);
3389        }
3390        let mean: f64 = td.iter().sum::<f64>() / td.len() as f64;
3391        let sum_sq: f64 = td.iter().map(|x| (x - mean).powi(2)).sum();
3392        return float_ns_to_timedelta(sum_sq / (td.len() - ddof) as f64);
3393    }
3394    let nums = collect_finite(values);
3395    if nums.len() <= ddof {
3396        return Scalar::Null(NullKind::NaN);
3397    }
3398    let mean: f64 = nums.iter().sum::<f64>() / nums.len() as f64;
3399    let sum_sq: f64 = nums.iter().map(|x| (x - mean).powi(2)).sum();
3400    Scalar::Float64(sum_sq / (nums.len() - ddof) as f64)
3401}
3402
3403pub fn nanstd(values: &[Scalar], ddof: usize) -> Scalar {
3404    // Per br-frankenpandas-j8ntk: Timedelta64 std preserves dtype.
3405    if let Some(td) = collect_timedelta_ns_f64(values) {
3406        if td.len() <= ddof {
3407            return Scalar::Timedelta64(Timedelta::NAT);
3408        }
3409        let mean: f64 = td.iter().sum::<f64>() / td.len() as f64;
3410        let sum_sq: f64 = td.iter().map(|x| (x - mean).powi(2)).sum();
3411        let var = sum_sq / (td.len() - ddof) as f64;
3412        return float_ns_to_timedelta(var.sqrt());
3413    }
3414    match nanvar(values, ddof) {
3415        Scalar::Float64(v) => Scalar::Float64(v.sqrt()),
3416        other => other,
3417    }
3418}
3419
3420/// Standard error of the mean over non-missing values.
3421///
3422/// Matches `pd.Series.sem(ddof=1)` / `scipy.stats.sem`. Computed as
3423/// `std(values, ddof) / sqrt(n)` where `n` is the non-missing count.
3424/// Returns `Null(NaN)` when `n <= ddof`.
3425pub fn nansem(values: &[Scalar], ddof: usize) -> Scalar {
3426    // Per br-frankenpandas-j8ntk: Timedelta64 sem preserves dtype.
3427    if let Some(td) = collect_timedelta_ns_f64(values) {
3428        if td.len() <= ddof {
3429            return Scalar::Timedelta64(Timedelta::NAT);
3430        }
3431        let mean: f64 = td.iter().sum::<f64>() / td.len() as f64;
3432        let sum_sq: f64 = td.iter().map(|x| (x - mean).powi(2)).sum();
3433        let var = sum_sq / (td.len() - ddof) as f64;
3434        let std = var.sqrt();
3435        return float_ns_to_timedelta(std / (td.len() as f64).sqrt());
3436    }
3437    let nums = collect_finite(values);
3438    if nums.len() <= ddof {
3439        return Scalar::Null(NullKind::NaN);
3440    }
3441    match nanstd(values, ddof) {
3442        Scalar::Float64(s) => Scalar::Float64(s / (nums.len() as f64).sqrt()),
3443        other => other,
3444    }
3445}
3446
3447/// Peak-to-peak range of non-missing values (max − min).
3448///
3449/// Matches `np.ptp` behavior on nan-safe inputs. Returns `Null(NaN)`
3450/// for empty or all-missing inputs.
3451pub fn nanptp(values: &[Scalar]) -> Scalar {
3452    // Per br-frankenpandas-u2g0r: Timedelta64 peak-to-peak returns
3453    // Timedelta64 (max - min in ns). collect_timedelta_ns_f64 is defined
3454    // in the cumulative-aggregations section below.
3455    if let Some(td) = collect_timedelta_ns_f64(values) {
3456        if td.is_empty() {
3457            return Scalar::Timedelta64(Timedelta::NAT);
3458        }
3459        let (mut lo, mut hi) = (f64::INFINITY, f64::NEG_INFINITY);
3460        for x in &td {
3461            if *x < lo {
3462                lo = *x;
3463            }
3464            if *x > hi {
3465                hi = *x;
3466            }
3467        }
3468        return float_ns_to_timedelta(hi - lo);
3469    }
3470    // Fused single-pass min/max (see `nansum`): track lo/hi while filtering, no
3471    // intermediate Vec<f64>. Bit-identical to the prior collect_finite two-pass:
3472    // `seen` is true exactly when collect_finite would be non-empty, and the
3473    // lo/hi comparisons fold the same finite values in the same order.
3474    let (mut lo, mut hi) = (f64::INFINITY, f64::NEG_INFINITY);
3475    let mut seen = false;
3476    for v in values {
3477        if v.is_missing() {
3478            continue;
3479        }
3480        if let Ok(x) = v.to_f64() {
3481            seen = true;
3482            if x < lo {
3483                lo = x;
3484            }
3485            if x > hi {
3486                hi = x;
3487            }
3488        }
3489    }
3490    if !seen {
3491        return Scalar::Null(NullKind::NaN);
3492    }
3493    Scalar::Float64(hi - lo)
3494}
3495
3496/// Sample skewness (bias-corrected, Fisher-Pearson) over non-missing values.
3497///
3498/// Matches `pd.Series.skew()`. Requires at least 3 non-missing values;
3499/// returns `Null(NaN)` otherwise, and when the sample standard deviation
3500/// is zero.
3501pub fn nanskew(values: &[Scalar]) -> Scalar {
3502    let nums = collect_finite(values);
3503    let n = nums.len() as f64;
3504    if n < 3.0 {
3505        return Scalar::Null(NullKind::NaN);
3506    }
3507    let mean = nums.iter().sum::<f64>() / n;
3508    let m2: f64 = nums.iter().map(|x| (x - mean).powi(2)).sum();
3509    let m3: f64 = nums.iter().map(|x| (x - mean).powi(3)).sum();
3510    let s2 = m2 / (n - 1.0);
3511    if s2 == 0.0 {
3512        return Scalar::Float64(0.0);
3513    }
3514    let s3 = s2.powf(1.5);
3515    Scalar::Float64((n / ((n - 1.0) * (n - 2.0))) * (m3 / s3))
3516}
3517
3518/// Excess sample kurtosis (Fisher's definition, bias-corrected) over
3519/// non-missing values.
3520///
3521/// Matches `pd.Series.kurt()`. Requires at least 4 non-missing values;
3522/// returns `Null(NaN)` otherwise, and when the sample standard deviation
3523/// is zero.
3524pub fn nankurt(values: &[Scalar]) -> Scalar {
3525    let nums = collect_finite(values);
3526    let n = nums.len() as f64;
3527    if n < 4.0 {
3528        return Scalar::Null(NullKind::NaN);
3529    }
3530    let mean = nums.iter().sum::<f64>() / n;
3531    let m2: f64 = nums.iter().map(|x| (x - mean).powi(2)).sum();
3532    let m4: f64 = nums.iter().map(|x| (x - mean).powi(4)).sum();
3533    let s2 = m2 / (n - 1.0);
3534    if s2 == 0.0 {
3535        return Scalar::Float64(0.0);
3536    }
3537    let adj = (n * (n + 1.0)) / ((n - 1.0) * (n - 2.0) * (n - 3.0));
3538    let sub = (3.0 * (n - 1.0).powi(2)) / ((n - 2.0) * (n - 3.0));
3539    Scalar::Float64(adj * (m4 / (s2 * s2)) - sub)
3540}
3541
3542/// Product of non-missing values. Returns 1.0 for empty input (matching pandas).
3543pub fn nanprod(values: &[Scalar]) -> Scalar {
3544    // Per br-frankenpandas-szq6a: pandas raises TypeError on
3545    // td_series.prod() because Timedelta² has no dimension. Returning the
3546    // misleading Float64(1.0) (empty-iterator default after collect_finite
3547    // drops every Timedelta64) is worse than surfacing missing. NaT
3548    // propagates the "type-incompatible" signal in lieu of a Result-level
3549    // error.
3550    if is_timedelta_input(values) {
3551        return Scalar::Null(NullKind::NaN);
3552    }
3553    // Fused single-pass fold (see `nansum`): filter missing / non-coercible and
3554    // multiply in one scan, no intermediate Vec<f64>. Bit-identical to
3555    // `collect_finite(..).iter().product()`: same finite values, same order,
3556    // same f64 `*` (Product for f64 == fold(1.0, *)); empty -> 1.0.
3557    let mut prod = 1.0_f64;
3558    for v in values {
3559        if v.is_missing() {
3560            continue;
3561        }
3562        if let Ok(x) = v.to_f64() {
3563            prod *= x;
3564        }
3565    }
3566    Scalar::Float64(prod)
3567}
3568
3569/// Cumulative sum respecting null propagation.
3570///
3571/// Per br-frankenpandas-x0x91: detect uniformly-Timedelta64 input
3572/// (allowing Null/NAT missing markers). Returns true when at least one
3573/// non-missing value is Timedelta64 and no other dtype appears.
3574fn is_timedelta_input(values: &[Scalar]) -> bool {
3575    let mut saw_td = false;
3576    for v in values {
3577        if v.is_missing() {
3578            continue;
3579        }
3580        match v {
3581            Scalar::Timedelta64(_) => saw_td = true,
3582            _ => return false,
3583        }
3584    }
3585    saw_td
3586}
3587
3588/// Per br-frankenpandas-x0x91: cumulative running aggregation over a
3589/// uniformly-Timedelta64 input. NaT/Null positions emit NaT and skip
3590/// the accumulator. Saturating i128 keeps overflow contained at i64
3591/// bounds when emitting.
3592fn timedelta_cumulative<F>(values: &[Scalar], init: i128, mut step: F) -> Vec<Scalar>
3593where
3594    F: FnMut(i128, i128) -> i128,
3595{
3596    let mut out = Vec::with_capacity(values.len());
3597    let mut running: i128 = init;
3598    for v in values {
3599        if v.is_missing() {
3600            out.push(Scalar::Null(NullKind::NaT));
3601            continue;
3602        }
3603        if let Scalar::Timedelta64(ns) = v {
3604            running = step(running, i128::from(*ns));
3605            let clamped = running.clamp(i128::from(i64::MIN), i128::from(i64::MAX));
3606            out.push(Scalar::Timedelta64(clamped as i64));
3607        } else {
3608            out.push(Scalar::Null(NullKind::NaT));
3609        }
3610    }
3611    out
3612}
3613
3614/// Per br-frankenpandas-x0x91: running extrema (min/max) over a
3615/// uniformly-Timedelta64 input. `sentinel` is the identity element
3616/// (i64::MAX for min, i64::MIN for max) used until the first
3617/// non-missing value initializes the accumulator.
3618fn timedelta_cumulative_extrema<F>(values: &[Scalar], sentinel: i64, mut step: F) -> Vec<Scalar>
3619where
3620    F: FnMut(i64, i64) -> i64,
3621{
3622    let mut out = Vec::with_capacity(values.len());
3623    let mut running: Option<i64> = None;
3624    for v in values {
3625        if v.is_missing() {
3626            out.push(Scalar::Null(NullKind::NaT));
3627            continue;
3628        }
3629        if let Scalar::Timedelta64(ns) = v {
3630            let new_val = match running {
3631                Some(prev) => step(prev, *ns),
3632                None => *ns,
3633            };
3634            running = Some(new_val);
3635            out.push(Scalar::Timedelta64(new_val));
3636        } else {
3637            out.push(Scalar::Null(NullKind::NaT));
3638        }
3639    }
3640    let _ = sentinel; // silence unused warning if closure ignores it
3641    out
3642}
3643
3644/// Matches `np.nancumsum` / `pd.Series.cumsum()`. Missing input positions
3645/// pass through as `Null(NaN)` in the output; the running sum ignores
3646/// those positions when accumulating.
3647pub fn nancumsum(values: &[Scalar]) -> Vec<Scalar> {
3648    // Per br-frankenpandas-x0x91: when input is uniformly Timedelta64 (with
3649    // optional NaT/Null missing markers), preserve Timedelta dtype to match
3650    // pandas td_series.cumsum() returning Timedelta64.
3651    if is_timedelta_input(values) {
3652        return timedelta_cumulative(values, 0_i128, |acc, x| acc.saturating_add(x));
3653    }
3654    let mut out = Vec::with_capacity(values.len());
3655    let mut running = 0.0_f64;
3656    for v in values {
3657        if v.is_missing() {
3658            out.push(Scalar::Null(NullKind::NaN));
3659            continue;
3660        }
3661        match v.to_f64() {
3662            Ok(x) if !x.is_nan() => {
3663                running += x;
3664                out.push(Scalar::Float64(running));
3665            }
3666            _ => out.push(Scalar::Null(NullKind::NaN)),
3667        }
3668    }
3669    out
3670}
3671
3672/// Cumulative product respecting null propagation.
3673///
3674/// Matches `np.nancumprod` / `pd.Series.cumprod()`. Missing positions
3675/// pass through as `Null(NaN)` without advancing the running product.
3676pub fn nancumprod(values: &[Scalar]) -> Vec<Scalar> {
3677    let mut out = Vec::with_capacity(values.len());
3678    let mut running = 1.0_f64;
3679    for v in values {
3680        if v.is_missing() {
3681            out.push(Scalar::Null(NullKind::NaN));
3682            continue;
3683        }
3684        match v.to_f64() {
3685            Ok(x) if !x.is_nan() => {
3686                running *= x;
3687                out.push(Scalar::Float64(running));
3688            }
3689            _ => out.push(Scalar::Null(NullKind::NaN)),
3690        }
3691    }
3692    out
3693}
3694
3695/// Cumulative maximum respecting null propagation.
3696///
3697/// Matches `pd.Series.cummax()`. Missing positions pass through as
3698/// `Null(NaN)` without updating the running maximum. The first
3699/// non-missing value initializes the running maximum.
3700pub fn nancummax(values: &[Scalar]) -> Vec<Scalar> {
3701    // Per br-frankenpandas-x0x91: Timedelta64 preserves dtype.
3702    if is_timedelta_input(values) {
3703        return timedelta_cumulative_extrema(values, i64::MAX, |acc, x| acc.max(x));
3704    }
3705    let mut out = Vec::with_capacity(values.len());
3706    let mut running: Option<f64> = None;
3707    for v in values {
3708        if v.is_missing() {
3709            out.push(Scalar::Null(NullKind::NaN));
3710            continue;
3711        }
3712        match v.to_f64() {
3713            Ok(x) if !x.is_nan() => {
3714                let new_val = match running {
3715                    Some(prev) => prev.max(x),
3716                    None => x,
3717                };
3718                running = Some(new_val);
3719                out.push(Scalar::Float64(new_val));
3720            }
3721            _ => out.push(Scalar::Null(NullKind::NaN)),
3722        }
3723    }
3724    out
3725}
3726
3727/// Cumulative minimum respecting null propagation.
3728///
3729/// Matches `pd.Series.cummin()`. Symmetric to `nancummax`.
3730pub fn nancummin(values: &[Scalar]) -> Vec<Scalar> {
3731    // Per br-frankenpandas-x0x91: Timedelta64 preserves dtype.
3732    if is_timedelta_input(values) {
3733        return timedelta_cumulative_extrema(values, i64::MIN, |acc, x| acc.min(x));
3734    }
3735    let mut out = Vec::with_capacity(values.len());
3736    let mut running: Option<f64> = None;
3737    for v in values {
3738        if v.is_missing() {
3739            out.push(Scalar::Null(NullKind::NaN));
3740            continue;
3741        }
3742        match v.to_f64() {
3743            Ok(x) if !x.is_nan() => {
3744                let new_val = match running {
3745                    Some(prev) => prev.min(x),
3746                    None => x,
3747                };
3748                running = Some(new_val);
3749                out.push(Scalar::Float64(new_val));
3750            }
3751            _ => out.push(Scalar::Null(NullKind::NaN)),
3752        }
3753    }
3754    out
3755}
3756
3757/// Linear-interpolation quantile over non-missing numeric values.
3758///
3759/// Matches `np.nanquantile(values, q)` with `interpolation='linear'`.
3760/// Returns `Null(NaN)` for empty inputs or when `q` is outside
3761/// `[0.0, 1.0]`.
3762pub fn nanquantile(values: &[Scalar], q: f64) -> Scalar {
3763    if !(0.0..=1.0).contains(&q) {
3764        return Scalar::Null(NullKind::NaN);
3765    }
3766    // Per br-frankenpandas-5djk7: pandas td_series.quantile(q) returns
3767    // Timedelta64 with linear-interpolated ns. Was silently NaN before.
3768    if let Some(mut td) = collect_timedelta_ns_f64(values) {
3769        if td.is_empty() {
3770            return Scalar::Timedelta64(Timedelta::NAT);
3771        }
3772        let n = td.len();
3773        if n == 1 {
3774            return float_ns_to_timedelta(td[0]);
3775        }
3776        let pos = q * (n - 1) as f64;
3777        let lo = pos.floor() as usize;
3778        let hi = pos.ceil() as usize;
3779        // O(n) selection instead of a full sort (see the numeric arm below):
3780        // select the lo-th order statistic; the (lo+1)-th is the MIN of the
3781        // right partition. Bit-identical (finite ns, values-only order stats).
3782        let cmp = |a: &f64, b: &f64| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal);
3783        let (_left, lo_ref, right) = td.select_nth_unstable_by(lo, cmp);
3784        let lo_val = *lo_ref;
3785        let ns = if lo == hi {
3786            lo_val
3787        } else {
3788            let hi_val = right.iter().copied().fold(f64::INFINITY, f64::min);
3789            let weight = pos - lo as f64;
3790            lo_val + (hi_val - lo_val) * weight
3791        };
3792        return float_ns_to_timedelta(ns);
3793    }
3794    let mut nums = collect_finite(values);
3795    if nums.is_empty() {
3796        return Scalar::Null(NullKind::NaN);
3797    }
3798    let n = nums.len();
3799    if n == 1 {
3800        return Scalar::Float64(nums[0]);
3801    }
3802    let pos = q * (n - 1) as f64;
3803    let lo = pos.floor() as usize;
3804    let hi = pos.ceil() as usize;
3805    // O(n) selection instead of a full sort: select the `lo`-th order statistic;
3806    // when interpolation is needed (hi == lo+1) the (lo+1)-th smallest is the
3807    // MIN of the right partition. Bit-identical to the sort path (same
3808    // nums[lo]/nums[hi] values, since order statistics depend only on values).
3809    let cmp = |a: &f64, b: &f64| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal);
3810    let (_left, lo_ref, right) = nums.select_nth_unstable_by(lo, cmp);
3811    let lo_val = *lo_ref;
3812    if lo == hi {
3813        return Scalar::Float64(lo_val);
3814    }
3815    let hi_val = right.iter().copied().fold(f64::INFINITY, f64::min);
3816    let weight = pos - lo as f64;
3817    Scalar::Float64(lo_val + (hi_val - lo_val) * weight)
3818}
3819
3820/// Position (in the original slice) of the non-missing maximum.
3821///
3822/// Matches `np.nanargmax`. Returns `None` if every value is missing.
3823/// Ties resolve to the first position seen (matching numpy).
3824pub fn nanargmax(values: &[Scalar]) -> Option<usize> {
3825    // Per br-frankenpandas-ql1t5: Timedelta64.to_f64() errors, so the
3826    // generic path would silently skip every Timedelta64 value and
3827    // return None. Pandas td_series.argmax() returns the position of
3828    // the largest Timedelta — compare i64 ns directly.
3829    if is_timedelta_input(values) {
3830        let mut best: Option<(usize, i64)> = None;
3831        for (i, v) in values.iter().enumerate() {
3832            if v.is_missing() {
3833                continue;
3834            }
3835            if let Scalar::Timedelta64(ns) = v {
3836                match best {
3837                    None => best = Some((i, *ns)),
3838                    Some((_, cur)) if *ns > cur => best = Some((i, *ns)),
3839                    _ => {}
3840                }
3841            }
3842        }
3843        return best.map(|(i, _)| i);
3844    }
3845    let mut best: Option<(usize, f64)> = None;
3846    for (i, v) in values.iter().enumerate() {
3847        if v.is_missing() {
3848            continue;
3849        }
3850        if let Ok(x) = v.to_f64() {
3851            if x.is_nan() {
3852                continue;
3853            }
3854            match best {
3855                None => best = Some((i, x)),
3856                Some((_, cur)) if x > cur => best = Some((i, x)),
3857                _ => {}
3858            }
3859        }
3860    }
3861    best.map(|(i, _)| i)
3862}
3863
3864/// Position (in the original slice) of the non-missing minimum.
3865///
3866/// Matches `np.nanargmin`. Returns `None` if every value is missing.
3867pub fn nanargmin(values: &[Scalar]) -> Option<usize> {
3868    // Per br-frankenpandas-ql1t5: Timedelta64 argmin via i64 ns compare.
3869    if is_timedelta_input(values) {
3870        let mut best: Option<(usize, i64)> = None;
3871        for (i, v) in values.iter().enumerate() {
3872            if v.is_missing() {
3873                continue;
3874            }
3875            if let Scalar::Timedelta64(ns) = v {
3876                match best {
3877                    None => best = Some((i, *ns)),
3878                    Some((_, cur)) if *ns < cur => best = Some((i, *ns)),
3879                    _ => {}
3880                }
3881            }
3882        }
3883        return best.map(|(i, _)| i);
3884    }
3885    let mut best: Option<(usize, f64)> = None;
3886    for (i, v) in values.iter().enumerate() {
3887        if v.is_missing() {
3888            continue;
3889        }
3890        if let Ok(x) = v.to_f64() {
3891            if x.is_nan() {
3892                continue;
3893            }
3894            match best {
3895                None => best = Some((i, x)),
3896                Some((_, cur)) if x < cur => best = Some((i, x)),
3897                _ => {}
3898            }
3899        }
3900    }
3901    best.map(|(i, _)| i)
3902}
3903
3904/// Count of unique non-missing values.
3905pub fn nannunique(values: &[Scalar]) -> Scalar {
3906    use rustc_hash::FxHashSet;
3907    #[derive(Hash, PartialEq, Eq)]
3908    enum ScalarKey<'a> {
3909        Bool(bool),
3910        Int64(i64),
3911        FloatBits(u64),
3912        Utf8(&'a str),
3913        Timedelta64(i64),
3914        Datetime64(i64),
3915        Period(i64),
3916        Interval(u64, u64, IntervalClosed),
3917    }
3918
3919    let mut seen = FxHashSet::default();
3920    for val in values {
3921        if val.is_missing() {
3922            continue;
3923        }
3924        let key = match val {
3925            Scalar::Bool(v) => ScalarKey::Bool(*v),
3926            Scalar::Int64(v) => ScalarKey::Int64(*v),
3927            Scalar::Float64(v) => {
3928                let normalized = if *v == 0.0 { 0.0 } else { *v };
3929                ScalarKey::FloatBits(normalized.to_bits())
3930            }
3931            Scalar::Utf8(v) => ScalarKey::Utf8(v.as_str()),
3932            Scalar::Timedelta64(v) => ScalarKey::Timedelta64(*v),
3933            Scalar::Datetime64(v) => ScalarKey::Datetime64(*v),
3934            Scalar::Period(v) => ScalarKey::Period(*v),
3935            Scalar::Interval(v) => ScalarKey::Interval(
3936                normalized_float_bits(v.left),
3937                normalized_float_bits(v.right),
3938                v.closed,
3939            ),
3940            Scalar::Null(_) => continue,
3941        };
3942        seen.insert(key);
3943    }
3944    Scalar::Int64(seen.len() as i64)
3945}
3946
3947fn normalized_float_bits(value: f64) -> u64 {
3948    let normalized = if value == 0.0 { 0.0 } else { value };
3949    normalized.to_bits()
3950}
3951
3952// ── Interval types (br-frankenpandas-j8k4 Phase 1) ──────────────────────
3953//
3954// Scaffolding for pandas `pd.Interval` / `pd.IntervalIndex` / `pd.IntervalDtype`.
3955//
3956// Phase 1 ships float-valued intervals only (matches `cut`/`qcut` output on
3957// numeric bins — the dominant pandas use case). Generic-subtype intervals
3958// over Int64 / Timestamp are deferred to Phase 2 alongside the DType::Interval
3959// enum-variant wiring. See br-j8k4 for the phased roadmap.
3960//
3961// Semantics mirror pandas: closed tells which endpoints are included.
3962//   Left    → [left, right)
3963//   Right   → (left, right]       ← pandas default
3964//   Both    → [left, right]
3965//   Neither → (left, right)
3966
3967/// Endpoint-inclusion policy for an `Interval`.
3968///
3969/// Matches pandas `pd.Interval.closed` / `pd.IntervalDtype.closed` string
3970/// values ("left" / "right" / "both" / "neither").
3971#[derive(
3972    Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default, Serialize, Deserialize,
3973)]
3974#[serde(rename_all = "snake_case")]
3975#[non_exhaustive]
3976pub enum IntervalClosed {
3977    /// `[left, right)` — left-inclusive, right-exclusive.
3978    Left,
3979    /// `(left, right]` — left-exclusive, right-inclusive. Pandas default.
3980    #[default]
3981    Right,
3982    /// `[left, right]` — both endpoints included.
3983    Both,
3984    /// `(left, right)` — neither endpoint included.
3985    Neither,
3986}
3987
3988impl IntervalClosed {
3989    /// Left endpoint included?
3990    #[must_use]
3991    pub fn left_closed(self) -> bool {
3992        matches!(self, Self::Left | Self::Both)
3993    }
3994
3995    /// Right endpoint included?
3996    #[must_use]
3997    pub fn right_closed(self) -> bool {
3998        matches!(self, Self::Right | Self::Both)
3999    }
4000}
4001
4002impl std::fmt::Display for IntervalClosed {
4003    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
4004        match self {
4005            Self::Left => write!(f, "left"),
4006            Self::Right => write!(f, "right"),
4007            Self::Both => write!(f, "both"),
4008            Self::Neither => write!(f, "neither"),
4009        }
4010    }
4011}
4012
4013/// A bounded numeric interval between two `f64` endpoints.
4014///
4015/// Matches `pd.Interval(left, right, closed)` on the numeric-subtype path.
4016/// Accessors match pandas: `.left`, `.right`, `.closed`, `.length`, `.mid`,
4017/// `.contains`, `.is_empty`, `.overlaps`.
4018#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
4019pub struct Interval {
4020    pub left: f64,
4021    pub right: f64,
4022    #[serde(default)]
4023    pub closed: IntervalClosed,
4024}
4025
4026impl Interval {
4027    /// Construct an interval. No validation on `left <= right` — pandas also
4028    /// accepts reversed intervals (they're non-empty only if empty-by-design).
4029    #[must_use]
4030    pub const fn new(left: f64, right: f64, closed: IntervalClosed) -> Self {
4031        Self {
4032            left,
4033            right,
4034            closed,
4035        }
4036    }
4037
4038    /// `right - left` (pandas `.length`). Negative for reversed intervals.
4039    #[must_use]
4040    pub fn length(&self) -> f64 {
4041        self.right - self.left
4042    }
4043
4044    /// Midpoint `(left + right) / 2` (pandas `.mid`).
4045    #[must_use]
4046    pub fn mid(&self) -> f64 {
4047        (self.left + self.right) / 2.0
4048    }
4049
4050    /// Empty iff endpoints coincide AND at least one side is open.
4051    /// Pandas semantics: `pd.Interval(3, 3, 'right').is_empty → True`.
4052    #[must_use]
4053    pub fn is_empty(&self) -> bool {
4054        self.left == self.right && !matches!(self.closed, IntervalClosed::Both)
4055    }
4056
4057    /// Does `value` fall inside this interval?
4058    ///
4059    /// NaN always returns false, matching pandas `pd.Interval.__contains__`
4060    /// behavior (NaN doesn't compare equal to anything).
4061    #[must_use]
4062    pub fn contains(&self, value: f64) -> bool {
4063        if value.is_nan() {
4064            return false;
4065        }
4066        let left_ok = if self.closed.left_closed() {
4067            value >= self.left
4068        } else {
4069            value > self.left
4070        };
4071        let right_ok = if self.closed.right_closed() {
4072            value <= self.right
4073        } else {
4074            value < self.right
4075        };
4076        left_ok && right_ok
4077    }
4078
4079    /// Do `self` and `other` share any point?
4080    ///
4081    /// Matches `pd.Interval.overlaps(other)`. Two intervals overlap iff the
4082    /// max of their lefts is less than the min of their rights, with
4083    /// endpoint-inclusion determining the strictness of the comparison when
4084    /// they touch exactly.
4085    #[must_use]
4086    pub fn overlaps(&self, other: &Self) -> bool {
4087        if self.left > other.right || other.left > self.right {
4088            return false;
4089        }
4090        // Touching-at-a-point: overlap iff both sides at that touchpoint are closed.
4091        if self.right == other.left {
4092            return self.closed.right_closed() && other.closed.left_closed();
4093        }
4094        if other.right == self.left {
4095            return other.closed.right_closed() && self.closed.left_closed();
4096        }
4097        true
4098    }
4099
4100    /// Parse an interval string. Supports bracket notation:
4101    /// - '[0, 1]' -> closed on both ends
4102    /// - '(0, 1)' -> open on both ends
4103    /// - '[0, 1)' -> closed left, open right (pandas default)
4104    /// - '(0, 1]' -> open left, closed right
4105    pub fn parse(s: &str) -> Result<Self, TypeError> {
4106        let s = s.trim();
4107        if s.len() < 5 {
4108            return Err(TypeError::ValueNotParseable {
4109                value: s.to_string(),
4110                target: "Interval".to_string(),
4111            });
4112        }
4113
4114        let first_char = s.chars().next().unwrap();
4115        let last_char = s.chars().last().unwrap();
4116
4117        let left_closed = match first_char {
4118            '[' => true,
4119            '(' => false,
4120            _ => {
4121                return Err(TypeError::ValueNotParseable {
4122                    value: s.to_string(),
4123                    target: "Interval".to_string(),
4124                });
4125            }
4126        };
4127
4128        let right_closed = match last_char {
4129            ']' => true,
4130            ')' => false,
4131            _ => {
4132                return Err(TypeError::ValueNotParseable {
4133                    value: s.to_string(),
4134                    target: "Interval".to_string(),
4135                });
4136            }
4137        };
4138
4139        let closed = match (left_closed, right_closed) {
4140            (true, true) => IntervalClosed::Both,
4141            (true, false) => IntervalClosed::Left,
4142            (false, true) => IntervalClosed::Right,
4143            (false, false) => IntervalClosed::Neither,
4144        };
4145
4146        let inner = &s[1..s.len() - 1];
4147        let parts: Vec<&str> = inner.split(',').collect();
4148        if parts.len() != 2 {
4149            return Err(TypeError::ValueNotParseable {
4150                value: s.to_string(),
4151                target: "Interval".to_string(),
4152            });
4153        }
4154
4155        let left: f64 = parts[0]
4156            .trim()
4157            .parse()
4158            .map_err(|_| TypeError::ValueNotParseable {
4159                value: s.to_string(),
4160                target: "Interval".to_string(),
4161            })?;
4162
4163        let right: f64 = parts[1]
4164            .trim()
4165            .parse()
4166            .map_err(|_| TypeError::ValueNotParseable {
4167                value: s.to_string(),
4168                target: "Interval".to_string(),
4169            })?;
4170
4171        Ok(Self::new(left, right, closed))
4172    }
4173}
4174
4175impl std::fmt::Display for Interval {
4176    /// Matches `str(pd.Interval(0, 5, 'right'))` → `"(0, 5]"`.
4177    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
4178        let left_bracket = if self.closed.left_closed() { '[' } else { '(' };
4179        let right_bracket = if self.closed.right_closed() { ']' } else { ')' };
4180        write!(
4181            f,
4182            "{left_bracket}{}, {}{right_bracket}",
4183            self.left, self.right
4184        )
4185    }
4186}
4187
4188// ── interval_range builders (br-frankenpandas-xaom — Phase 2 of j8k4) ────
4189
4190/// Build `periods` equal-width intervals spanning `[start, end]`.
4191///
4192/// Matches `pd.interval_range(start, end, periods=N, closed=...)` for the
4193/// numeric-subtype case. Returns exactly `periods` intervals; when
4194/// `periods == 0` or `start >= end`, returns an empty vector (matches
4195/// pandas's empty IntervalIndex).
4196///
4197/// ```
4198/// use fp_types::{interval_range_by_periods, IntervalClosed};
4199/// let bins = interval_range_by_periods(0.0, 10.0, 5, IntervalClosed::Right);
4200/// assert_eq!(bins.len(), 5);
4201/// assert_eq!(bins[0].left, 0.0);
4202/// assert_eq!(bins[0].right, 2.0);
4203/// assert_eq!(bins[4].right, 10.0);
4204/// ```
4205#[must_use]
4206pub fn interval_range_by_periods(
4207    start: f64,
4208    end: f64,
4209    periods: usize,
4210    closed: IntervalClosed,
4211) -> Vec<Interval> {
4212    if periods == 0 || !start.is_finite() || !end.is_finite() || start >= end {
4213        return Vec::new();
4214    }
4215    let step = (end - start) / (periods as f64);
4216    let mut out = Vec::with_capacity(periods);
4217    for i in 0..periods {
4218        let left = start + step * (i as f64);
4219        // Use end exactly for the final right edge to avoid float drift.
4220        let right = if i + 1 == periods {
4221            end
4222        } else {
4223            start + step * ((i + 1) as f64)
4224        };
4225        out.push(Interval::new(left, right, closed));
4226    }
4227    out
4228}
4229
4230/// Build equal-`step`-width intervals spanning `[start, end]`.
4231///
4232/// Matches `pd.interval_range(start, end, freq=step, closed=...)` for the
4233/// numeric-subtype case. `step` must be finite and positive; `(end - start)`
4234/// must be an integer multiple of `step` (within float tolerance) — pandas
4235/// raises `ValueError` otherwise; this fn returns `Err(TypeError::IntervalStepDoesNotDivide)`.
4236///
4237/// Returns an empty vector when `start == end` (matches pandas' zero-bin
4238/// IntervalIndex); returns an empty vector when `start > end` (pandas also
4239/// returns empty rather than erroring in this case).
4240pub fn interval_range_by_step(
4241    start: f64,
4242    end: f64,
4243    step: f64,
4244    closed: IntervalClosed,
4245) -> Result<Vec<Interval>, TypeError> {
4246    if !step.is_finite() || !step.is_sign_positive() || step == 0.0 {
4247        return Err(TypeError::InvalidIntervalStep { step });
4248    }
4249    if !start.is_finite() || !end.is_finite() || start >= end {
4250        return Ok(Vec::new());
4251    }
4252    let span = end - start;
4253    let periods_f = span / step;
4254    let periods = periods_f.round() as i64;
4255    if periods <= 0 {
4256        return Ok(Vec::new());
4257    }
4258    let reconstructed = step * (periods as f64);
4259    // Relative tolerance: allow float-rounding noise proportional to span.
4260    if (span - reconstructed).abs() > span.abs() * 1e-9 + 1e-12 {
4261        return Err(TypeError::IntervalStepDoesNotDivide { step, span });
4262    }
4263    let periods = periods as usize;
4264    let mut out = Vec::with_capacity(periods);
4265    for i in 0..periods {
4266        let left = start + step * (i as f64);
4267        let right = if i + 1 == periods {
4268            end
4269        } else {
4270            start + step * ((i + 1) as f64)
4271        };
4272        out.push(Interval::new(left, right, closed));
4273    }
4274    Ok(out)
4275}
4276
4277// ── Period types (br-frankenpandas-epoj Phase 1) ────────────────────────
4278//
4279// Scaffolding for pandas `pd.Period` / `pd.PeriodIndex` / `pd.PeriodDtype`.
4280//
4281// A Period is a calendar *span* (Q1 2024, Jan 2024, 2024-03-15), distinct
4282// from a Timestamp (an instant). Phase 1 ships the PeriodFreq enum +
4283// Period struct with ordinal-based arithmetic (Period + n, Period - Period),
4284// Display in pandas notation, and parse from standard strings. Calendar-
4285// conversion (ordinal ↔ ymd) and DType::Period wiring land in Phase 2.
4286
4287/// Period frequency code. Matches pandas offset alias core set.
4288///
4289/// The ordinal axis is frequency-specific: for Monthly, ordinal 0 is a
4290/// fixed anchor (pandas uses months since 1970-01). Phase 1 doesn't
4291/// commit to a specific epoch yet — the ordinal scheme is opaque until
4292/// Phase 2 wires calendar arithmetic. What Phase 1 DOES nail down is:
4293/// same-freq Periods compare + subtract; Period + i64 shifts by `n`
4294/// periods of the declared frequency.
4295#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
4296#[serde(rename_all = "SCREAMING-KEBAB-CASE")]
4297#[non_exhaustive]
4298pub enum PeriodFreq {
4299    /// `Y-DEC` / `A` / `Y` — annual periods.
4300    Annual,
4301    /// `Q-DEC` / `Q` — quarterly periods.
4302    Quarterly,
4303    /// `M` — monthly periods.
4304    Monthly,
4305    /// `W-SUN` / `W` — weekly periods.
4306    Weekly,
4307    /// `D` — daily periods.
4308    Daily,
4309    /// `B` — business-day periods.
4310    Business,
4311    /// `h` / `H` — hourly periods.
4312    Hourly,
4313    /// `min` / `T` — minutely periods.
4314    Minutely,
4315    /// `s` / `S` — secondly periods.
4316    Secondly,
4317}
4318
4319impl PeriodFreq {
4320    /// Parse a pandas-style frequency alias. Recognizes the common subset
4321    /// (Y-DEC/A/Y, Q-DEC/Q, M, W-SUN/W, D, B, h/H, min/T, s/S).
4322    /// Case-insensitive.
4323    pub fn parse(alias: &str) -> Option<Self> {
4324        match alias.to_ascii_uppercase().as_str() {
4325            "A" | "Y" | "A-DEC" | "Y-DEC" | "ANNUAL" | "YEARLY" => Some(Self::Annual),
4326            "Q" | "Q-DEC" | "QUARTERLY" => Some(Self::Quarterly),
4327            "M" | "MONTHLY" => Some(Self::Monthly),
4328            "W" | "W-SUN" | "WEEKLY" => Some(Self::Weekly),
4329            "D" | "DAILY" => Some(Self::Daily),
4330            "B" | "BUSINESS" => Some(Self::Business),
4331            "H" | "HOURLY" => Some(Self::Hourly),
4332            "T" | "MIN" | "MINUTELY" => Some(Self::Minutely),
4333            "S" | "SECONDLY" => Some(Self::Secondly),
4334            _ => None,
4335        }
4336    }
4337
4338    /// Canonical pandas alias string.
4339    #[must_use]
4340    pub const fn alias(self) -> &'static str {
4341        match self {
4342            Self::Annual => "Y-DEC",
4343            Self::Quarterly => "Q-DEC",
4344            Self::Monthly => "M",
4345            Self::Weekly => "W-SUN",
4346            Self::Daily => "D",
4347            Self::Business => "B",
4348            Self::Hourly => "h",
4349            Self::Minutely => "min",
4350            Self::Secondly => "s",
4351        }
4352    }
4353
4354    /// Per br-frankenpandas-qigpe: resolution string for PeriodIndex.resolution.
4355    #[must_use]
4356    pub const fn resolution(self) -> &'static str {
4357        match self {
4358            Self::Annual => "A-DEC",
4359            Self::Quarterly => "Q-DEC",
4360            Self::Monthly => "M",
4361            Self::Weekly => "W-SUN",
4362            Self::Daily => "D",
4363            Self::Business => "B",
4364            Self::Hourly => "H",
4365            Self::Minutely => "T",
4366            Self::Secondly => "S",
4367        }
4368    }
4369}
4370
4371impl std::fmt::Display for PeriodFreq {
4372    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
4373        f.write_str(self.alias())
4374    }
4375}
4376
4377/// A single pandas-style Period value.
4378///
4379/// Stored as an integer ordinal on a frequency-specific axis plus the
4380/// frequency code. Two Periods with different `freq` are incompatible —
4381/// arithmetic and comparison require same-freq operands.
4382#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
4383pub struct Period {
4384    pub ordinal: i64,
4385    pub freq: PeriodFreq,
4386}
4387
4388impl Period {
4389    #[must_use]
4390    pub const fn new(ordinal: i64, freq: PeriodFreq) -> Self {
4391        Self { ordinal, freq }
4392    }
4393
4394    /// Integer position on this period's frequency axis, matching
4395    /// `pd.Period.ordinal`.
4396    #[must_use]
4397    pub const fn ordinal(&self) -> i64 {
4398        self.ordinal
4399    }
4400
4401    /// Frequency code for this period, matching `pd.Period.freq`.
4402    #[must_use]
4403    pub const fn freq(&self) -> PeriodFreq {
4404        self.freq
4405    }
4406
4407    /// Canonical pandas frequency alias, matching `pd.Period.freqstr`.
4408    #[must_use]
4409    pub const fn freqstr(&self) -> &'static str {
4410        self.freq.alias()
4411    }
4412
4413    /// Same-freq ordinal comparison. Returns `None` if `freq` differs —
4414    /// caller decides whether that's an error or a panic site.
4415    #[must_use]
4416    pub fn cmp_same_freq(&self, other: &Self) -> Option<std::cmp::Ordering> {
4417        if self.freq != other.freq {
4418            return None;
4419        }
4420        Some(self.ordinal.cmp(&other.ordinal))
4421    }
4422
4423    /// Shift by `n` periods of the current frequency.
4424    /// Matches `pd.Period + n` and `pd.Period - n`.
4425    #[must_use]
4426    pub fn shift(&self, n: i64) -> Self {
4427        Self {
4428            ordinal: self.ordinal.saturating_add(n),
4429            freq: self.freq,
4430        }
4431    }
4432
4433    /// Period-difference in units of the shared frequency.
4434    /// Returns `None` if `freq` differs (pandas raises IncompatibleFrequency).
4435    #[must_use]
4436    pub fn diff(&self, other: &Self) -> Option<i64> {
4437        if self.freq != other.freq {
4438            return None;
4439        }
4440        Some(self.ordinal.saturating_sub(other.ordinal))
4441    }
4442
4443    /// Parse common pandas `Period(...)` strings and infer the frequency.
4444    ///
4445    /// Supported forms mirror pandas' unambiguous scalar constructor cases:
4446    /// annual (`"2024"`), quarterly (`"2024Q1"`), monthly (`"2024-01"`),
4447    /// and daily (`"2024-01-15"`). The ordinal axes match pandas:
4448    /// 1970, 1970Q1, 1970-01, and 1970-01-01 all have ordinal 0.
4449    pub fn parse(s: &str) -> Result<Self, TypeError> {
4450        let trimmed = s.trim();
4451        if trimmed.eq_ignore_ascii_case("nat") {
4452            return Ok(Self::new(i64::MIN, PeriodFreq::Daily));
4453        }
4454
4455        if let Some((year, quarter)) = parse_quarter_period(trimmed) {
4456            let ordinal = year
4457                .checked_sub(1970)
4458                .and_then(|offset| offset.checked_mul(4))
4459                .and_then(|base| base.checked_add(i64::from(quarter) - 1))
4460                .ok_or_else(|| TypeError::ValueNotParseable {
4461                    value: s.to_owned(),
4462                    target: "Period".to_owned(),
4463                })?;
4464            return Ok(Self::new(ordinal, PeriodFreq::Quarterly));
4465        }
4466
4467        if let Some((year, month, day)) = parse_ymd_period(trimmed) {
4468            let ordinal = Timestamp::days_from_ymd(year, i64::from(month), i64::from(day));
4469            return Ok(Self::new(ordinal, PeriodFreq::Daily));
4470        }
4471
4472        if let Some((year, month)) = parse_year_month_period(trimmed) {
4473            let ordinal = year
4474                .checked_sub(1970)
4475                .and_then(|offset| offset.checked_mul(12))
4476                .and_then(|base| base.checked_add(i64::from(month) - 1))
4477                .ok_or_else(|| TypeError::ValueNotParseable {
4478                    value: s.to_owned(),
4479                    target: "Period".to_owned(),
4480                })?;
4481            return Ok(Self::new(ordinal, PeriodFreq::Monthly));
4482        }
4483
4484        if let Some(year) = parse_annual_period(trimmed) {
4485            let ordinal = year
4486                .checked_sub(1970)
4487                .ok_or_else(|| TypeError::ValueNotParseable {
4488                    value: s.to_owned(),
4489                    target: "Period".to_owned(),
4490                })?;
4491            return Ok(Self::new(ordinal, PeriodFreq::Annual));
4492        }
4493
4494        Err(TypeError::ValueNotParseable {
4495            value: s.to_owned(),
4496            target: "Period".to_owned(),
4497        })
4498    }
4499}
4500
4501fn parse_annual_period(value: &str) -> Option<i64> {
4502    (value.len() == 4 && value.chars().all(|ch| ch.is_ascii_digit()))
4503        .then(|| value.parse::<i64>().ok())
4504        .flatten()
4505}
4506
4507fn parse_year_month_period(value: &str) -> Option<(i64, u32)> {
4508    let (year, month) = value.split_once('-')?;
4509    if year.len() != 4 || month.len() != 2 {
4510        return None;
4511    }
4512    let year = year.parse::<i64>().ok()?;
4513    let month = month.parse::<u32>().ok()?;
4514    (1..=12).contains(&month).then_some((year, month))
4515}
4516
4517fn parse_ymd_period(value: &str) -> Option<(i64, u32, u32)> {
4518    let mut parts = value.split('-');
4519    let year = parts.next()?;
4520    let month = parts.next()?;
4521    let day = parts.next()?;
4522    if parts.next().is_some() || year.len() != 4 || month.len() != 2 || day.len() != 2 {
4523        return None;
4524    }
4525    let year = year.parse::<i64>().ok()?;
4526    let month = month.parse::<u32>().ok()?;
4527    let day = day.parse::<u32>().ok()?;
4528    (1..=days_in_month(year, month)?)
4529        .contains(&day)
4530        .then_some((year, month, day))
4531}
4532
4533fn parse_quarter_period(value: &str) -> Option<(i64, u32)> {
4534    let (year, quarter) = value.split_once('Q').or_else(|| value.split_once('q'))?;
4535    if year.len() != 4 || quarter.len() != 1 {
4536        return None;
4537    }
4538    let year = year.parse::<i64>().ok()?;
4539    let quarter = quarter.parse::<u32>().ok()?;
4540    (1..=4).contains(&quarter).then_some((year, quarter))
4541}
4542
4543impl std::fmt::Display for Period {
4544    /// Phase 1: ordinal+freq form, e.g. `Period[Q-DEC, 216]`. Calendar-
4545    /// formatted display (`2024Q1`, `2024-03`) lands in Phase 2 once the
4546    /// ordinal-to-ymd arithmetic is wired.
4547    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
4548        write!(f, "Period[{}, {}]", self.freq, self.ordinal)
4549    }
4550}
4551
4552/// Build `periods` consecutive Periods starting at `start`.
4553///
4554/// Matches `pd.period_range(start, periods=N, freq=start.freq)` for the
4555/// count-based form. The frequency is taken from `start` — pandas requires
4556/// `freq` to match when both are passed; mismatches are an error in
4557/// pandas, but here we sidestep ambiguity by deriving from `start.freq`.
4558///
4559/// Per br-frankenpandas-2jef (epoj Phase 2). Pure ordinal arithmetic — no
4560/// calendar conversion (Phase 3 wires chrono). `periods=0` returns empty.
4561///
4562/// ```
4563/// use fp_types::{period_range, Period, PeriodFreq};
4564/// let q1 = Period::new(216, PeriodFreq::Quarterly);
4565/// let year = period_range(q1, 4);
4566/// assert_eq!(year.len(), 4);
4567/// assert_eq!(year[0].ordinal, 216);
4568/// assert_eq!(year[3].ordinal, 219);
4569/// ```
4570#[must_use]
4571pub fn period_range(start: Period, periods: usize) -> Vec<Period> {
4572    (0..periods).map(|i| start.shift(i as i64)).collect()
4573}
4574
4575#[cfg(test)]
4576mod tests {
4577    use super::{
4578        DType, Interval, IntervalClosed, NullKind, Period, PeriodFreq, Scalar, SparseDType,
4579        cast_scalar, common_dtype, infer_dtype,
4580    };
4581
4582    /// br-frankenpandas-esjjy / fd90.182: ergonomic From impls for Scalar.
4583    #[test]
4584    fn scalar_from_primitive_types() {
4585        // Each primitive maps to its canonical Scalar variant.
4586        assert_eq!(Scalar::from(true), Scalar::Bool(true));
4587        assert_eq!(Scalar::from(42i64), Scalar::Int64(42));
4588        assert_eq!(Scalar::from(1.5f64), Scalar::Float64(1.5));
4589        assert_eq!(Scalar::from("hi"), Scalar::Utf8("hi".to_owned()));
4590        assert_eq!(
4591            Scalar::from(String::from("world")),
4592            Scalar::Utf8("world".to_owned())
4593        );
4594
4595        // .into() works in mixed-type Vec<Scalar> contexts (the README's
4596        // case_when example pattern, and what fd90.181 needed for apply_row
4597        // closures).
4598        let mixed: Vec<Scalar> = vec![1i64.into(), 2.0f64.into(), "three".into()];
4599        assert_eq!(mixed.len(), 3);
4600        assert_eq!(mixed[0], Scalar::Int64(1));
4601        assert_eq!(mixed[1], Scalar::Float64(2.0));
4602        assert_eq!(mixed[2], Scalar::Utf8("three".to_owned()));
4603    }
4604
4605    #[test]
4606    fn dtype_inference_coerces_numeric_values() {
4607        let values = vec![Scalar::Bool(true), Scalar::Int64(7), Scalar::Float64(3.5)];
4608        assert_eq!(
4609            infer_dtype(&values).expect("dtype should infer"),
4610            DType::Float64
4611        );
4612    }
4613
4614    #[test]
4615    fn interval_scalar_has_dtype_storage_and_unique_semantics_5g5uj() {
4616        let left = Scalar::Interval(Interval::new(0.0, 1.0, IntervalClosed::Right));
4617        let right = Scalar::Interval(Interval::new(1.0, 2.0, IntervalClosed::Right));
4618        assert_eq!(left.dtype(), DType::Interval);
4619        assert!(!left.is_missing());
4620        assert_eq!(
4621            infer_dtype(&[left.clone(), right.clone()]).expect("interval dtype"),
4622            DType::Interval
4623        );
4624        assert_eq!(
4625            common_dtype(DType::Interval, DType::Interval).expect("same interval dtype"),
4626            DType::Interval
4627        );
4628        assert_eq!(
4629            cast_scalar(&Scalar::Null(NullKind::Null), DType::Interval).expect("missing casts"),
4630            Scalar::Null(NullKind::Null)
4631        );
4632        assert_eq!(
4633            cast_scalar(&left, DType::Utf8).expect("interval string cast"),
4634            Scalar::Utf8("(0, 1]".to_owned())
4635        );
4636        assert_eq!(
4637            super::nannunique(&[left.clone(), right, left, Scalar::Null(NullKind::Null)]),
4638            Scalar::Int64(2)
4639        );
4640    }
4641
4642    #[test]
4643    fn cast_scalar_parses_temporal_extension_strings_avm08() {
4644        let expected_nanos = super::Timestamp::parse("2024-01-15T10:30:45")
4645            .expect("timestamp parse")
4646            .nanos;
4647        assert_eq!(
4648            cast_scalar(
4649                &Scalar::Utf8("2024-01-15T10:30:45".to_owned()),
4650                DType::Datetime64
4651            )
4652            .expect("datetime cast"),
4653            Scalar::Datetime64(expected_nanos)
4654        );
4655        assert_eq!(
4656            cast_scalar(&Scalar::Utf8("2024Q1".to_owned()), DType::Period).expect("period cast"),
4657            Scalar::Period(216)
4658        );
4659        assert_eq!(
4660            cast_scalar(&Scalar::Utf8("(0, 1]".to_owned()), DType::Interval)
4661                .expect("interval cast"),
4662            Scalar::Interval(Interval::new(0.0, 1.0, IntervalClosed::Right))
4663        );
4664    }
4665
4666    #[test]
4667    fn missing_values_get_target_missing_marker() {
4668        let missing = Scalar::Null(NullKind::Null);
4669        let cast = cast_scalar(&missing, DType::Float64).expect("missing casts");
4670        assert_eq!(cast, Scalar::Null(NullKind::NaN));
4671    }
4672
4673    #[test]
4674    fn cast_scalar_to_utf8_uses_pandas_string_spellings() {
4675        let cases = [
4676            (Scalar::Bool(true), "True"),
4677            (Scalar::Bool(false), "False"),
4678            (Scalar::Int64(-7), "-7"),
4679            (Scalar::Float64(1.0), "1.0"),
4680            (Scalar::Float64(1.5), "1.5"),
4681            (Scalar::Float64(f64::NAN), "nan"),
4682            (Scalar::Null(NullKind::Null), "None"),
4683            (Scalar::Null(NullKind::NaN), "nan"),
4684            (Scalar::Null(NullKind::NaT), "NaT"),
4685        ];
4686
4687        for (value, expected) in cases {
4688            assert_eq!(
4689                cast_scalar(&value, DType::Utf8).expect("cast"),
4690                Scalar::Utf8(expected.to_owned())
4691            );
4692        }
4693    }
4694
4695    #[test]
4696    fn semantic_eq_treats_nan_as_equal() {
4697        let left = Scalar::Float64(f64::NAN);
4698        let right = Scalar::Null(NullKind::NaN);
4699        assert!(left.semantic_eq(&right));
4700    }
4701
4702    #[test]
4703    fn semantic_eq_treats_nan_as_missing_null() {
4704        let left = Scalar::Float64(f64::NAN);
4705        let right = Scalar::Null(NullKind::Null);
4706        assert!(left.semantic_eq(&right));
4707    }
4708
4709    #[test]
4710    fn common_dtype_rejects_string_numeric_mix() {
4711        let err = common_dtype(DType::Utf8, DType::Int64).expect_err("must fail");
4712        assert_eq!(
4713            err.to_string(),
4714            "dtype coercion from Utf8 to Int64 has no compatible common type"
4715        );
4716        let err = common_dtype(DType::Float64, DType::Utf8).expect_err("must fail");
4717        assert_eq!(
4718            err.to_string(),
4719            "dtype coercion from Float64 to Utf8 has no compatible common type"
4720        );
4721    }
4722
4723    #[test]
4724    fn sparse_dtype_normalizes_fill_value_to_value_dtype() {
4725        let dtype = SparseDType::new(DType::Float64, Scalar::Int64(0)).expect("fill should cast");
4726
4727        assert_eq!(dtype.value_dtype, DType::Float64);
4728        assert_eq!(dtype.fill_value, Scalar::Float64(0.0));
4729    }
4730
4731    #[test]
4732    fn sparse_dtype_rejects_sparse_value_dtype() {
4733        let err = SparseDType::new(DType::Sparse, Scalar::Int64(0)).expect_err("must reject");
4734
4735        assert_eq!(err.to_string(), "sparse value dtype cannot be Sparse");
4736    }
4737
4738    #[test]
4739    fn common_dtype_rejects_sparse_dense_mix() {
4740        let err = common_dtype(DType::Sparse, DType::Int64).expect_err("must fail");
4741
4742        assert_eq!(
4743            err.to_string(),
4744            "dtype coercion from Sparse to Int64 has no compatible common type"
4745        );
4746    }
4747
4748    // ── Nullable Int64/Bool dtype tests (br-frankenpandas-rg8ys.6.4) ────
4749
4750    #[test]
4751    fn nullable_int64_promotion_matrix() {
4752        // Int64 + Int64Nullable -> Int64Nullable (nullable absorbs)
4753        assert_eq!(
4754            common_dtype(DType::Int64, DType::Int64Nullable).unwrap(),
4755            DType::Int64Nullable
4756        );
4757        assert_eq!(
4758            common_dtype(DType::Int64Nullable, DType::Int64).unwrap(),
4759            DType::Int64Nullable
4760        );
4761
4762        // Int64Nullable + Float64 -> Float64 (float always wins)
4763        assert_eq!(
4764            common_dtype(DType::Int64Nullable, DType::Float64).unwrap(),
4765            DType::Float64
4766        );
4767        assert_eq!(
4768            common_dtype(DType::Float64, DType::Int64Nullable).unwrap(),
4769            DType::Float64
4770        );
4771
4772        // Int64Nullable + Int64Nullable -> Int64Nullable
4773        assert_eq!(
4774            common_dtype(DType::Int64Nullable, DType::Int64Nullable).unwrap(),
4775            DType::Int64Nullable
4776        );
4777
4778        // Bool + Int64Nullable -> Int64Nullable
4779        assert_eq!(
4780            common_dtype(DType::Bool, DType::Int64Nullable).unwrap(),
4781            DType::Int64Nullable
4782        );
4783
4784        // BoolNullable + Int64 -> Int64Nullable
4785        assert_eq!(
4786            common_dtype(DType::BoolNullable, DType::Int64).unwrap(),
4787            DType::Int64Nullable
4788        );
4789    }
4790
4791    #[test]
4792    fn nullable_bool_promotion_matrix() {
4793        // Bool + BoolNullable -> BoolNullable
4794        assert_eq!(
4795            common_dtype(DType::Bool, DType::BoolNullable).unwrap(),
4796            DType::BoolNullable
4797        );
4798        assert_eq!(
4799            common_dtype(DType::BoolNullable, DType::Bool).unwrap(),
4800            DType::BoolNullable
4801        );
4802
4803        // BoolNullable + Float64 -> Float64
4804        assert_eq!(
4805            common_dtype(DType::BoolNullable, DType::Float64).unwrap(),
4806            DType::Float64
4807        );
4808    }
4809
4810    #[test]
4811    fn dtype_is_nullable_helper() {
4812        assert!(DType::Int64Nullable.is_nullable());
4813        assert!(DType::BoolNullable.is_nullable());
4814        assert!(!DType::Int64.is_nullable());
4815        assert!(!DType::Bool.is_nullable());
4816        assert!(!DType::Float64.is_nullable());
4817        assert!(!DType::Utf8.is_nullable());
4818    }
4819
4820    #[test]
4821    fn dtype_to_nullable_conversions() {
4822        assert_eq!(DType::Int64.to_nullable(), DType::Int64Nullable);
4823        assert_eq!(DType::Bool.to_nullable(), DType::BoolNullable);
4824        assert_eq!(DType::Float64.to_nullable(), DType::Float64); // unchanged
4825        assert_eq!(DType::Int64Nullable.to_nullable(), DType::Int64Nullable);
4826    }
4827
4828    #[test]
4829    fn dtype_to_non_nullable_conversions() {
4830        assert_eq!(DType::Int64Nullable.to_non_nullable(), DType::Int64);
4831        assert_eq!(DType::BoolNullable.to_non_nullable(), DType::Bool);
4832        assert_eq!(DType::Int64.to_non_nullable(), DType::Int64); // unchanged
4833        assert_eq!(DType::Float64.to_non_nullable(), DType::Float64);
4834    }
4835
4836    #[test]
4837    fn nullable_dtype_name_reports_pandas_style() {
4838        assert_eq!(DType::Int64.name(), "int64");
4839        assert_eq!(DType::Int64Nullable.name(), "Int64"); // capital I
4840        assert_eq!(DType::Bool.name(), "bool");
4841        assert_eq!(DType::BoolNullable.name(), "boolean");
4842    }
4843
4844    #[test]
4845    fn cast_scalar_int64_nullable_identity() {
4846        let val = Scalar::Int64(42);
4847        // Int64 -> Int64Nullable is identity (no actual conversion)
4848        let result = cast_scalar(&val, DType::Int64Nullable).unwrap();
4849        assert_eq!(result, Scalar::Int64(42));
4850
4851        // Int64Nullable -> Int64 is also identity
4852        let result2 = cast_scalar(&val, DType::Int64).unwrap();
4853        assert_eq!(result2, Scalar::Int64(42));
4854    }
4855
4856    #[test]
4857    fn cast_float_to_utf8_uses_pandas_str_float_with_scientific() {
4858        // pandas astype(str) of floats == Python str(float): whole -> ".0",
4859        // shortest round-trip decimals, scientific (e+NN/e-NN) for large/small,
4860        // inf -> "inf", NaN -> "nan". Verified vs live pandas 2.2.3. (Previously
4861        // large/small lost scientific notation, e.g. 1e16 -> "10000000000000000.0".)
4862        let cases: &[(f64, &str)] = &[
4863            (1.0, "1.0"),
4864            (2.5, "2.5"),
4865            (100.0, "100.0"),
4866            (0.1, "0.1"),
4867            (0.0001, "0.0001"),
4868            (1e16, "1e+16"),
4869            (1e20, "1e+20"),
4870            (1e-5, "1e-05"),
4871            (1e-7, "1e-07"),
4872            (f64::INFINITY, "inf"),
4873            (f64::NEG_INFINITY, "-inf"),
4874        ];
4875        for (v, expected) in cases {
4876            assert_eq!(
4877                cast_scalar(&Scalar::Float64(*v), DType::Utf8).unwrap(),
4878                Scalar::Utf8((*expected).to_owned()),
4879                "float {v} -> str"
4880            );
4881        }
4882    }
4883
4884    #[test]
4885    fn cast_to_bool_uses_pandas_nonzero_truthiness() {
4886        // pandas astype(bool): zero -> False, any nonzero -> True (not just 0/1),
4887        // -0.0 -> False, and NaN -> True (numpy bool(nan), br-cyi4h). Verified vs
4888        // live pandas 2.2.3.
4889        let cases_int: &[(i64, bool)] = &[(0, false), (1, true), (-3, true), (2, true)];
4890        for (v, expected) in cases_int {
4891            assert_eq!(
4892                cast_scalar(&Scalar::Int64(*v), DType::Bool).unwrap(),
4893                Scalar::Bool(*expected),
4894                "int {v} -> bool"
4895            );
4896        }
4897        let cases_float: &[(f64, bool)] = &[
4898            (0.0, false),
4899            (-0.0, false),
4900            (0.1, true),
4901            (2.5, true),
4902            (1.0, true),
4903            // pandas astype(bool): NaN is truthy -> True (numpy bool). br-cyi4h.
4904            (f64::NAN, true),
4905        ];
4906        for (v, expected) in cases_float {
4907            assert_eq!(
4908                cast_scalar(&Scalar::Float64(*v), DType::Bool).unwrap(),
4909                Scalar::Bool(*expected),
4910                "float {v} -> bool"
4911            );
4912        }
4913    }
4914
4915    #[test]
4916    fn nullable_dtype_is_extension() {
4917        assert!(DType::Int64Nullable.is_extension());
4918        assert!(DType::BoolNullable.is_extension());
4919        assert!(!DType::Int64.is_extension());
4920        assert!(!DType::Bool.is_extension());
4921    }
4922
4923    #[test]
4924    fn infer_dtype_preserves_string_numeric_mix_as_utf8_bucket() {
4925        let values = vec![Scalar::Utf8("x".into()), Scalar::Int64(7)];
4926        assert_eq!(
4927            infer_dtype(&values).expect("dtype should infer"),
4928            DType::Utf8
4929        );
4930    }
4931
4932    // ── Scalar missingness methods ─────────────────────────────────────
4933
4934    #[test]
4935    fn is_null_detects_explicit_nulls() {
4936        assert!(Scalar::Null(NullKind::Null).is_null());
4937        assert!(Scalar::Null(NullKind::NaN).is_null());
4938        assert!(!Scalar::Int64(42).is_null());
4939        assert!(!Scalar::Float64(f64::NAN).is_null());
4940    }
4941
4942    #[test]
4943    fn is_na_matches_is_missing() {
4944        let vals = vec![
4945            Scalar::Null(NullKind::Null),
4946            Scalar::Float64(f64::NAN),
4947            Scalar::Int64(0),
4948            Scalar::Bool(false),
4949        ];
4950        for v in &vals {
4951            assert_eq!(v.is_na(), v.is_missing());
4952        }
4953    }
4954
4955    #[test]
4956    fn coalesce_picks_first_non_missing() {
4957        let null = Scalar::Null(NullKind::Null);
4958        let fill = Scalar::Int64(99);
4959        assert_eq!(null.coalesce(&fill), fill);
4960        assert_eq!(fill.coalesce(&null), fill);
4961    }
4962
4963    // ── Missingness utilities ──────────────────────────────────────────
4964
4965    #[test]
4966    fn isna_notna_complement() {
4967        let vals = vec![
4968            Scalar::Int64(1),
4969            Scalar::Null(NullKind::Null),
4970            Scalar::Float64(f64::NAN),
4971            Scalar::Float64(3.0),
4972        ];
4973        let na = super::isna(&vals);
4974        let not = super::notna(&vals);
4975        assert_eq!(na, vec![false, true, true, false]);
4976        for (a, b) in na.iter().zip(not.iter()) {
4977            assert_ne!(a, b);
4978        }
4979    }
4980
4981    #[test]
4982    fn count_na_counts_missing() {
4983        let vals = vec![
4984            Scalar::Int64(1),
4985            Scalar::Null(NullKind::Null),
4986            Scalar::Float64(f64::NAN),
4987        ];
4988        assert_eq!(super::count_na(&vals), 2);
4989    }
4990
4991    #[test]
4992    fn fill_na_replaces_missing() {
4993        let vals = vec![
4994            Scalar::Int64(1),
4995            Scalar::Null(NullKind::Null),
4996            Scalar::Float64(f64::NAN),
4997            Scalar::Int64(4),
4998        ];
4999        let filled = super::fill_na(&vals, &Scalar::Int64(0));
5000        assert_eq!(filled[0], Scalar::Int64(1));
5001        assert_eq!(filled[1], Scalar::Int64(0));
5002        assert_eq!(filled[2], Scalar::Int64(0));
5003        assert_eq!(filled[3], Scalar::Int64(4));
5004    }
5005
5006    #[test]
5007    fn dropna_removes_missing() {
5008        let vals = vec![
5009            Scalar::Int64(1),
5010            Scalar::Null(NullKind::Null),
5011            Scalar::Int64(3),
5012            Scalar::Float64(f64::NAN),
5013        ];
5014        let kept = super::dropna(&vals);
5015        assert_eq!(kept.len(), 2);
5016        assert_eq!(kept[0], Scalar::Int64(1));
5017        assert_eq!(kept[1], Scalar::Int64(3));
5018    }
5019
5020    // ── Nanops ─────────────────────────────────────────────────────────
5021
5022    #[test]
5023    fn nansum_skips_nulls() {
5024        let vals = vec![
5025            Scalar::Float64(1.0),
5026            Scalar::Null(NullKind::Null),
5027            Scalar::Float64(2.0),
5028            Scalar::Float64(f64::NAN),
5029            Scalar::Int64(7),
5030        ];
5031        assert_eq!(super::nansum(&vals), Scalar::Float64(10.0));
5032    }
5033
5034    #[test]
5035    fn nansum_empty_returns_zero() {
5036        assert_eq!(super::nansum(&[]), Scalar::Float64(0.0));
5037    }
5038
5039    #[test]
5040    fn nannunique_merges_negative_zero_and_zero() {
5041        let vals = vec![
5042            Scalar::Float64(-0.0),
5043            Scalar::Float64(0.0),
5044            Scalar::Float64(1.0),
5045        ];
5046        assert_eq!(super::nannunique(&vals), Scalar::Int64(2));
5047    }
5048
5049    #[test]
5050    fn nanmean_basic() {
5051        let vals = vec![
5052            Scalar::Float64(2.0),
5053            Scalar::Null(NullKind::Null),
5054            Scalar::Float64(4.0),
5055        ];
5056        assert_eq!(super::nanmean(&vals), Scalar::Float64(3.0));
5057    }
5058
5059    #[test]
5060    fn nanmean_all_null_returns_nan() {
5061        let vals = vec![Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)];
5062        assert!(super::nanmean(&vals).is_missing());
5063    }
5064
5065    #[test]
5066    fn nansum_nanmean_timedelta64_preserves_dtype_620mj() {
5067        // Per br-frankenpandas-620mj: pandas td_series.sum()/mean() return
5068        // Timedelta64, not Float64(0.0). Was silently zero before because
5069        // collect_finite drops Timedelta64 (to_f64 errors).
5070        let one_hour = 3_600 * 1_000_000_000_i64;
5071        let vals = vec![
5072            Scalar::Timedelta64(one_hour),
5073            Scalar::Timedelta64(2 * one_hour),
5074            Scalar::Timedelta64(3 * one_hour),
5075        ];
5076        assert_eq!(super::nansum(&vals), Scalar::Timedelta64(6 * one_hour));
5077        assert_eq!(super::nanmean(&vals), Scalar::Timedelta64(2 * one_hour));
5078    }
5079
5080    #[test]
5081    fn nansum_nanmean_timedelta64_skips_nat_620mj() {
5082        let one_hour = 3_600 * 1_000_000_000_i64;
5083        let vals = vec![
5084            Scalar::Timedelta64(Timedelta::NAT),
5085            Scalar::Timedelta64(one_hour),
5086            Scalar::Timedelta64(3 * one_hour),
5087            Scalar::Timedelta64(Timedelta::NAT),
5088        ];
5089        // NAT is missing → skipped. Sum: 1h+3h=4h; mean: 2h.
5090        assert_eq!(super::nansum(&vals), Scalar::Timedelta64(4 * one_hour));
5091        assert_eq!(super::nanmean(&vals), Scalar::Timedelta64(2 * one_hour));
5092    }
5093
5094    #[test]
5095    fn nansum_nanmean_mixed_timedelta_other_falls_back_620mj() {
5096        // Mixed Timedelta64 + other type bails out of the Timedelta path
5097        // and uses Float64 collect_finite (which drops Timedelta).
5098        // Preserves existing cross-type behavior (effectively ignores TD).
5099        let vals = vec![Scalar::Timedelta64(3600 * 1_000_000_000), Scalar::Int64(5)];
5100        // Int64(5) makes it through to_f64 → 5.0; Timedelta is dropped.
5101        assert_eq!(super::nansum(&vals), Scalar::Float64(5.0));
5102    }
5103
5104    #[test]
5105    fn nancount_counts_non_missing() {
5106        let vals = vec![
5107            Scalar::Int64(1),
5108            Scalar::Null(NullKind::Null),
5109            Scalar::Float64(3.0),
5110        ];
5111        assert_eq!(super::nancount(&vals), Scalar::Int64(2));
5112    }
5113
5114    #[test]
5115    fn nanmin_basic() {
5116        let vals = vec![
5117            Scalar::Float64(5.0),
5118            Scalar::Null(NullKind::Null),
5119            Scalar::Float64(2.0),
5120            Scalar::Float64(8.0),
5121        ];
5122        assert_eq!(super::nanmin(&vals), Scalar::Float64(2.0));
5123    }
5124
5125    #[test]
5126    fn nanmax_basic() {
5127        let vals = vec![
5128            Scalar::Float64(5.0),
5129            Scalar::Float64(f64::NAN),
5130            Scalar::Float64(8.0),
5131        ];
5132        assert_eq!(super::nanmax(&vals), Scalar::Float64(8.0));
5133    }
5134
5135    #[test]
5136    fn nanmin_nanmax_empty_returns_nan() {
5137        assert!(super::nanmin(&[]).is_missing());
5138        assert!(super::nanmax(&[]).is_missing());
5139    }
5140
5141    #[test]
5142    fn nanmin_nanmax_mixed_incompatible_types_returns_nan() {
5143        let vals = vec![Scalar::Int64(5), Scalar::Utf8("hello".into())];
5144        assert!(super::nanmin(&vals).is_missing());
5145        assert!(super::nanmax(&vals).is_missing());
5146
5147        let vals2 = vec![Scalar::Utf8("a".into()), Scalar::Float64(3.0)];
5148        assert!(super::nanmin(&vals2).is_missing());
5149        assert!(super::nanmax(&vals2).is_missing());
5150    }
5151
5152    #[test]
5153    fn nanmin_nanmax_compatible_numeric_types_ok() {
5154        let vals = vec![Scalar::Int64(5), Scalar::Float64(3.0), Scalar::Bool(true)];
5155        assert_eq!(super::nanmin(&vals), Scalar::Bool(true));
5156        assert_eq!(super::nanmax(&vals), Scalar::Int64(5));
5157    }
5158
5159    #[test]
5160    fn nanmin_nanmax_timedelta64_returns_timedelta_yic5m() {
5161        // Per br-frankenpandas-yic5m: nanmin/nanmax on Timedelta64 returns
5162        // the smallest/largest Timedelta64 — was silently NaN before
5163        // because Timedelta64.to_f64() errors and the catch-all swallowed it.
5164        let one_hour = 3_600 * 1_000_000_000_i64;
5165        let vals = vec![
5166            Scalar::Timedelta64(3 * one_hour),
5167            Scalar::Timedelta64(one_hour),
5168            Scalar::Timedelta64(2 * one_hour),
5169        ];
5170        assert_eq!(super::nanmin(&vals), Scalar::Timedelta64(one_hour));
5171        assert_eq!(super::nanmax(&vals), Scalar::Timedelta64(3 * one_hour));
5172    }
5173
5174    #[test]
5175    fn nanmin_nanmax_timedelta64_skips_nat_yic5m() {
5176        let one_hour = 3_600 * 1_000_000_000_i64;
5177        let vals = vec![
5178            Scalar::Timedelta64(Timedelta::NAT),
5179            Scalar::Timedelta64(one_hour),
5180            Scalar::Timedelta64(2 * one_hour),
5181            Scalar::Timedelta64(Timedelta::NAT),
5182        ];
5183        assert_eq!(super::nanmin(&vals), Scalar::Timedelta64(one_hour));
5184        assert_eq!(super::nanmax(&vals), Scalar::Timedelta64(2 * one_hour));
5185    }
5186
5187    #[test]
5188    fn nanmedian_odd_count() {
5189        let vals = vec![
5190            Scalar::Float64(3.0),
5191            Scalar::Null(NullKind::Null),
5192            Scalar::Float64(1.0),
5193            Scalar::Float64(2.0),
5194        ];
5195        assert_eq!(super::nanmedian(&vals), Scalar::Float64(2.0));
5196    }
5197
5198    #[test]
5199    fn nanmedian_even_count() {
5200        let vals = vec![
5201            Scalar::Float64(1.0),
5202            Scalar::Float64(3.0),
5203            Scalar::Float64(2.0),
5204            Scalar::Float64(4.0),
5205        ];
5206        assert_eq!(super::nanmedian(&vals), Scalar::Float64(2.5));
5207    }
5208
5209    #[test]
5210    fn nanvar_population() {
5211        let vals = vec![
5212            Scalar::Float64(2.0),
5213            Scalar::Float64(4.0),
5214            Scalar::Float64(4.0),
5215            Scalar::Float64(4.0),
5216            Scalar::Float64(5.0),
5217            Scalar::Float64(5.0),
5218            Scalar::Float64(7.0),
5219            Scalar::Float64(9.0),
5220        ];
5221        let var = super::nanvar(&vals, 0);
5222        assert!(matches!(var, Scalar::Float64(_)), "expected Float64");
5223        if let Scalar::Float64(v) = var {
5224            assert!((v - 4.0).abs() < 1e-10);
5225        }
5226    }
5227
5228    #[test]
5229    fn nanvar_sample_ddof1() {
5230        let vals = vec![
5231            Scalar::Float64(2.0),
5232            Scalar::Float64(4.0),
5233            Scalar::Float64(4.0),
5234            Scalar::Float64(4.0),
5235            Scalar::Float64(5.0),
5236            Scalar::Float64(5.0),
5237            Scalar::Float64(7.0),
5238            Scalar::Float64(9.0),
5239        ];
5240        let var = super::nanvar(&vals, 1);
5241        assert!(matches!(var, Scalar::Float64(_)), "expected Float64");
5242        if let Scalar::Float64(v) = var {
5243            assert!((v - 32.0 / 7.0).abs() < 1e-10);
5244        }
5245    }
5246
5247    #[test]
5248    fn nanvar_insufficient_values_returns_nan() {
5249        let vals = vec![Scalar::Float64(5.0)];
5250        assert!(super::nanvar(&vals, 1).is_missing());
5251    }
5252
5253    #[test]
5254    fn nanstd_is_sqrt_of_var() {
5255        let vals = vec![
5256            Scalar::Float64(2.0),
5257            Scalar::Float64(4.0),
5258            Scalar::Float64(4.0),
5259            Scalar::Float64(4.0),
5260            Scalar::Float64(5.0),
5261            Scalar::Float64(5.0),
5262            Scalar::Float64(7.0),
5263            Scalar::Float64(9.0),
5264        ];
5265        let std = super::nanstd(&vals, 0);
5266        assert!(matches!(std, Scalar::Float64(_)), "expected Float64");
5267        if let Scalar::Float64(v) = std {
5268            assert!((v - 2.0).abs() < 1e-10);
5269        }
5270    }
5271
5272    #[test]
5273    fn nanmedian_timedelta64_preserves_dtype_j8ntk() {
5274        // Per br-frankenpandas-j8ntk: pandas td_series.median() returns
5275        // Timedelta64; was silently NaN before via collect_finite.
5276        let one_hour = 3_600 * 1_000_000_000_i64;
5277        let vals = vec![
5278            Scalar::Timedelta64(one_hour),
5279            Scalar::Timedelta64(2 * one_hour),
5280            Scalar::Timedelta64(3 * one_hour),
5281        ];
5282        assert_eq!(super::nanmedian(&vals), Scalar::Timedelta64(2 * one_hour));
5283    }
5284
5285    #[test]
5286    fn nanstd_timedelta64_preserves_dtype_j8ntk() {
5287        // Per br-frankenpandas-j8ntk: pandas td_series.std() returns
5288        // Timedelta64. Check Timedelta64 output and reasonable magnitude
5289        // for population std of [1h, 2h, 3h] = sqrt(2/3) * 1h.
5290        let one_hour: i64 = 3_600 * 1_000_000_000;
5291        let vals = vec![
5292            Scalar::Timedelta64(one_hour),
5293            Scalar::Timedelta64(2 * one_hour),
5294            Scalar::Timedelta64(3 * one_hour),
5295        ];
5296        let std = super::nanstd(&vals, 0);
5297        match std {
5298            Scalar::Timedelta64(ns) => {
5299                let expected = (2.0_f64 / 3.0).sqrt() * one_hour as f64;
5300                assert!(
5301                    (ns as f64 - expected).abs() < 1e6,
5302                    "expected ~{expected} ns, got {ns}"
5303                );
5304            }
5305            other => panic!("expected Timedelta64, got {other:?}"),
5306        }
5307    }
5308
5309    #[test]
5310    fn nanstd_nansem_timedelta64_insufficient_returns_nat_j8ntk() {
5311        let one_hour = 3_600 * 1_000_000_000_i64;
5312        let vals = vec![Scalar::Timedelta64(one_hour)];
5313        // ddof=1 with n=1 → underflow, returns NaT
5314        match super::nanstd(&vals, 1) {
5315            Scalar::Timedelta64(v) => assert_eq!(v, Timedelta::NAT),
5316            other => panic!("expected Timedelta64 NAT, got {other:?}"),
5317        }
5318        match super::nansem(&vals, 1) {
5319            Scalar::Timedelta64(v) => assert_eq!(v, Timedelta::NAT),
5320            other => panic!("expected Timedelta64 NAT, got {other:?}"),
5321        }
5322    }
5323
5324    #[test]
5325    fn nanops_with_mixed_types() {
5326        let vals = vec![
5327            Scalar::Bool(true),
5328            Scalar::Int64(3),
5329            Scalar::Float64(6.0),
5330            Scalar::Null(NullKind::Null),
5331        ];
5332        assert_eq!(super::nansum(&vals), Scalar::Float64(10.0));
5333        assert_eq!(super::nancount(&vals), Scalar::Int64(3));
5334    }
5335
5336    #[test]
5337    fn nanops_all_missing_returns_identity() {
5338        let vals = vec![Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)];
5339        assert_eq!(super::nansum(&vals), Scalar::Float64(0.0));
5340        assert!(super::nanmean(&vals).is_missing());
5341        assert!(super::nanmedian(&vals).is_missing());
5342        assert!(super::nanvar(&vals, 0).is_missing());
5343        assert!(super::nanstd(&vals, 0).is_missing());
5344    }
5345
5346    // ── Timedelta tests ────────────────────────────────────────────────
5347
5348    #[test]
5349    fn timedelta_parse_simple_units() {
5350        use super::Timedelta;
5351        assert_eq!(Timedelta::parse("1d").unwrap(), Timedelta::NANOS_PER_DAY);
5352        assert_eq!(
5353            Timedelta::parse("2h").unwrap(),
5354            2 * Timedelta::NANOS_PER_HOUR
5355        );
5356        assert_eq!(
5357            Timedelta::parse("30m").unwrap(),
5358            30 * Timedelta::NANOS_PER_MIN
5359        );
5360        assert_eq!(
5361            Timedelta::parse("45s").unwrap(),
5362            45 * Timedelta::NANOS_PER_SEC
5363        );
5364        assert_eq!(
5365            Timedelta::parse("100ms").unwrap(),
5366            100 * Timedelta::NANOS_PER_MILLI
5367        );
5368        assert_eq!(
5369            Timedelta::parse("500us").unwrap(),
5370            500 * Timedelta::NANOS_PER_MICRO
5371        );
5372        assert_eq!(Timedelta::parse("1000ns").unwrap(), 1000);
5373    }
5374
5375    #[test]
5376    fn timedelta_parse_compound() {
5377        use super::Timedelta;
5378        let expected = Timedelta::NANOS_PER_DAY
5379            + 2 * Timedelta::NANOS_PER_HOUR
5380            + 30 * Timedelta::NANOS_PER_MIN;
5381        assert_eq!(Timedelta::parse("1d 2h 30m").unwrap(), expected);
5382        assert_eq!(Timedelta::parse("1d2h30m").unwrap(), expected);
5383    }
5384
5385    #[test]
5386    fn timedelta_parse_iso8601_matches_pandas_tdiso() {
5387        use super::Timedelta;
5388        // Verified vs pandas 2.2.3 Timedelta(...).value.
5389        assert_eq!(Timedelta::parse("P1DT2H3M4S").unwrap(), 93_784_000_000_000);
5390        assert_eq!(Timedelta::parse("PT1H").unwrap(), 3_600_000_000_000);
5391        assert_eq!(Timedelta::parse("PT1H30M").unwrap(), 5_400_000_000_000);
5392        assert_eq!(Timedelta::parse("P1D").unwrap(), 86_400_000_000_000);
5393        assert_eq!(Timedelta::parse("P2W").unwrap(), 1_209_600_000_000_000);
5394        assert_eq!(Timedelta::parse("PT0.5S").unwrap(), 500_000_000);
5395        // pandas quirks: T ignored, M is minutes everywhere, units in any order.
5396        assert_eq!(Timedelta::parse("P1M").unwrap(), 60_000_000_000);
5397        assert_eq!(Timedelta::parse("P1H").unwrap(), 3_600_000_000_000);
5398        assert_eq!(Timedelta::parse("PT1D").unwrap(), 86_400_000_000_000);
5399        assert_eq!(Timedelta::parse("P1D1H").unwrap(), 90_000_000_000_000);
5400        assert_eq!(Timedelta::parse("-P1DT2H").unwrap(), -93_600_000_000_000);
5401        // Rejected like pandas: years, lowercase, bare P/PT.
5402        assert!(Timedelta::parse("P1Y").is_err());
5403        assert!(Timedelta::parse("p1d").is_err());
5404        assert!(Timedelta::parse("P").is_err());
5405        assert!(Timedelta::parse("PT").is_err());
5406    }
5407
5408    #[test]
5409    fn timedelta_parse_time_format() {
5410        use super::Timedelta;
5411        let expected = Timedelta::NANOS_PER_HOUR
5412            + 30 * Timedelta::NANOS_PER_MIN
5413            + 45 * Timedelta::NANOS_PER_SEC;
5414        assert_eq!(Timedelta::parse("01:30:45").unwrap(), expected);
5415    }
5416
5417    #[test]
5418    fn timedelta_parse_time_fraction_rejects_unicode_without_panic() {
5419        use super::{Timedelta, TimedeltaError};
5420        let err = Timedelta::parse("00:00:00.\u{00e9}\u{00e9}\u{00e9}\u{00e9}\u{00e9}")
5421            .expect_err("non-ASCII fractional seconds must reject");
5422        assert!(matches!(err, TimedeltaError::InvalidFormat(_)));
5423    }
5424
5425    #[test]
5426    fn timedelta_parse_time_format_rejects_overflow_without_panic() {
5427        use super::{Timedelta, TimedeltaError};
5428        let err = Timedelta::parse("9223372036854775807:00")
5429            .expect_err("oversized hour component must reject");
5430        assert!(matches!(err, TimedeltaError::InvalidFormat(_)));
5431    }
5432
5433    #[test]
5434    fn timedelta_parse_rejects_huge_value_overflow_zw3mg() {
5435        // Per br-frankenpandas-zw3mg: the compound parser used a raw
5436        // `as i64` cast that silently saturated to i64::MAX when the
5437        // product of (decimal-digit f64) × unit multiplier overflows.
5438        // Use a large literal (no scientific notation — the lexer only
5439        // accepts digits, '.', '-'). 1e18 days × NANOS_PER_DAY (~8.64e13)
5440        // overflows i64.
5441        use super::{Timedelta, TimedeltaError};
5442        let huge = format!("{} days", "9".repeat(18));
5443        assert!(matches!(
5444            Timedelta::parse(&huge).expect_err("9...(18 9s) days must overflow"),
5445            TimedeltaError::Overflow
5446        ));
5447    }
5448
5449    #[test]
5450    fn timedelta_parse_nat() {
5451        use super::Timedelta;
5452        assert_eq!(Timedelta::parse("NaT").unwrap(), Timedelta::NAT);
5453        assert_eq!(Timedelta::parse("nat").unwrap(), Timedelta::NAT);
5454    }
5455
5456    #[test]
5457    fn timedelta_parse_negative() {
5458        use super::Timedelta;
5459        assert_eq!(Timedelta::parse("-1d").unwrap(), -Timedelta::NANOS_PER_DAY);
5460    }
5461
5462    #[test]
5463    fn timedelta_components() {
5464        use super::Timedelta;
5465        let nanos = Timedelta::NANOS_PER_DAY
5466            + Timedelta::NANOS_PER_HOUR
5467            + Timedelta::NANOS_PER_MIN
5468            + Timedelta::NANOS_PER_SEC
5469            + Timedelta::NANOS_PER_MILLI
5470            + 2 * Timedelta::NANOS_PER_MICRO
5471            + 3;
5472        let comp = Timedelta::components(nanos);
5473        assert_eq!(comp.days, 1);
5474        assert_eq!(comp.hours, 1);
5475        assert_eq!(comp.minutes, 1);
5476        assert_eq!(comp.seconds, 1);
5477        assert_eq!(comp.milliseconds, 1);
5478        assert_eq!(comp.microseconds, 2);
5479        assert_eq!(comp.nanoseconds, 3);
5480    }
5481
5482    #[test]
5483    fn timedelta_negative_components_floor_div() {
5484        use super::Timedelta;
5485        // pandas floor-normalizes negative timedeltas: pd.Timedelta(-1,'s') has
5486        // days=-1, seconds=86399, components=(-1, 23, 59, 59, 0, 0, 0).
5487        let neg_1s = -Timedelta::NANOS_PER_SEC;
5488        assert_eq!(Timedelta::days(neg_1s), -1);
5489        assert_eq!(Timedelta::seconds(neg_1s), 86399);
5490        assert_eq!(Timedelta::microseconds(neg_1s), 0);
5491        assert_eq!(Timedelta::nanoseconds(neg_1s), 0);
5492        let comp = Timedelta::components(neg_1s);
5493        assert_eq!(
5494            (
5495                comp.days,
5496                comp.hours,
5497                comp.minutes,
5498                comp.seconds,
5499                comp.milliseconds,
5500                comp.microseconds,
5501                comp.nanoseconds
5502            ),
5503            (-1, 23, 59, 59, 0, 0, 0)
5504        );
5505
5506        // pd.Timedelta(-86401,'s'): days=-2, seconds=86399.
5507        let neg = -86_401 * Timedelta::NANOS_PER_SEC;
5508        assert_eq!(Timedelta::days(neg), -2);
5509        assert_eq!(Timedelta::seconds(neg), 86399);
5510    }
5511
5512    #[test]
5513    fn timedelta_total_seconds() {
5514        use super::Timedelta;
5515        let nanos = 90_000_000_000i64; // 90 seconds
5516        assert!((Timedelta::total_seconds(nanos) - 90.0).abs() < 1e-9);
5517        assert!(Timedelta::total_seconds(Timedelta::NAT).is_nan());
5518    }
5519
5520    #[test]
5521    fn timedelta_format_basic() {
5522        use super::Timedelta;
5523        assert_eq!(Timedelta::format(Timedelta::NAT), "NaT");
5524        assert_eq!(
5525            Timedelta::format(Timedelta::NANOS_PER_DAY),
5526            "1 days 00:00:00"
5527        );
5528        assert_eq!(
5529            Timedelta::format(Timedelta::NANOS_PER_DAY + 2 * Timedelta::NANOS_PER_HOUR),
5530            "1 days 02:00:00"
5531        );
5532    }
5533
5534    #[test]
5535    fn timedelta_format_subsecond_matches_pandas() {
5536        use super::Timedelta;
5537        // pandas str(Timedelta) uses 6 fractional digits (microseconds) unless a
5538        // sub-microsecond (nanosecond) component is present, then 9 digits.
5539        // Verified vs live pandas 2.2.3.
5540        assert_eq!(
5541            Timedelta::format(1_500_000_000), // 1.5s
5542            "0 days 00:00:01.500000"
5543        );
5544        assert_eq!(
5545            Timedelta::format(1_000_000), // 1ms
5546            "0 days 00:00:00.001000"
5547        );
5548        assert_eq!(
5549            Timedelta::format(123_456_000), // 123456us
5550            "0 days 00:00:00.123456"
5551        );
5552        // Nanosecond component -> 9 digits.
5553        assert_eq!(
5554            Timedelta::format(500), // 500ns
5555            "0 days 00:00:00.000000500"
5556        );
5557        assert_eq!(Timedelta::format(123_456_789), "0 days 00:00:00.123456789");
5558    }
5559
5560    #[test]
5561    fn timedelta_format_negative_uses_python_borrow_form() {
5562        use super::Timedelta;
5563        // pandas/Python normalize negatives via floor division: the days count
5564        // goes negative, the time remainder stays non-negative, and a '+' joins
5565        // them. Verified vs live pandas 2.2.3.
5566        assert_eq!(Timedelta::format(-1_000_000_000), "-1 days +23:59:59");
5567        assert_eq!(
5568            Timedelta::format(-Timedelta::NANOS_PER_DAY),
5569            "-1 days +00:00:00"
5570        );
5571        assert_eq!(
5572            Timedelta::format(-25 * Timedelta::NANOS_PER_HOUR),
5573            "-2 days +23:00:00"
5574        );
5575        assert_eq!(
5576            Timedelta::format(-1_500_000_000),
5577            "-1 days +23:59:58.500000"
5578        );
5579        assert_eq!(Timedelta::format(-500), "-1 days +23:59:59.999999500");
5580        assert_eq!(Timedelta::format(-1), "-1 days +23:59:59.999999999");
5581    }
5582
5583    #[test]
5584    fn timedelta_isoformat_basic() {
5585        use super::Timedelta;
5586        assert_eq!(Timedelta::isoformat(Timedelta::NAT), "NaT");
5587        assert_eq!(Timedelta::isoformat(0), "P0DT0H0M0S");
5588        assert_eq!(Timedelta::isoformat(Timedelta::NANOS_PER_DAY), "P1DT0H0M0S");
5589        assert_eq!(
5590            Timedelta::isoformat(
5591                Timedelta::NANOS_PER_DAY
5592                    + 2 * Timedelta::NANOS_PER_HOUR
5593                    + 30 * Timedelta::NANOS_PER_MIN
5594                    + 45 * Timedelta::NANOS_PER_SEC
5595            ),
5596            "P1DT2H30M45S"
5597        );
5598        assert_eq!(
5599            Timedelta::isoformat(Timedelta::NANOS_PER_SEC + 500_000_000),
5600            "P0DT0H0M1.5S"
5601        );
5602        assert_eq!(
5603            Timedelta::isoformat(-(Timedelta::NANOS_PER_DAY + Timedelta::NANOS_PER_HOUR)),
5604            "-P1DT1H0M0S"
5605        );
5606    }
5607
5608    #[test]
5609    fn timedelta_floor_ceil_round() {
5610        use super::Timedelta;
5611        let nanos = Timedelta::NANOS_PER_HOUR + 30 * Timedelta::NANOS_PER_MIN;
5612
5613        // floor: rounds down
5614        assert_eq!(Timedelta::floor(nanos, "h"), Timedelta::NANOS_PER_HOUR);
5615        assert_eq!(Timedelta::floor(nanos, "d"), 0);
5616
5617        // ceil: rounds up
5618        assert_eq!(Timedelta::ceil(nanos, "h"), 2 * Timedelta::NANOS_PER_HOUR);
5619        assert_eq!(Timedelta::ceil(nanos, "d"), Timedelta::NANOS_PER_DAY);
5620
5621        // round: rounds to nearest (banker's rounding on tie)
5622        assert_eq!(Timedelta::round(nanos, "h"), 2 * Timedelta::NANOS_PER_HOUR);
5623
5624        // NaT preserved
5625        assert_eq!(Timedelta::floor(Timedelta::NAT, "h"), Timedelta::NAT);
5626        assert_eq!(Timedelta::ceil(Timedelta::NAT, "h"), Timedelta::NAT);
5627        assert_eq!(Timedelta::round(Timedelta::NAT, "h"), Timedelta::NAT);
5628
5629        // Invalid freq returns NAT
5630        assert_eq!(Timedelta::floor(nanos, "invalid"), Timedelta::NAT);
5631    }
5632
5633    #[test]
5634    fn timedelta_scalar_dtype() {
5635        let td = Scalar::Timedelta64(86_400_000_000_000);
5636        assert_eq!(td.dtype(), DType::Timedelta64);
5637    }
5638
5639    #[test]
5640    fn timedelta_scalar_is_missing() {
5641        use super::Timedelta;
5642        let valid = Scalar::Timedelta64(1000);
5643        let nat = Scalar::Timedelta64(Timedelta::NAT);
5644        assert!(!valid.is_missing());
5645        assert!(nat.is_missing());
5646    }
5647
5648    #[test]
5649    fn dtype_utf8_deserializes_legacy_aliases() {
5650        let dtype: DType = serde_json::from_str("\"str\"").unwrap();
5651        assert_eq!(dtype, DType::Utf8);
5652
5653        let dtype: DType = serde_json::from_str("\"string\"").unwrap();
5654        assert_eq!(dtype, DType::Utf8);
5655    }
5656
5657    #[test]
5658    fn scalar_utf8_deserializes_legacy_aliases() {
5659        let scalar: Scalar = serde_json::from_str(r#"{"kind":"str","value":"x"}"#).unwrap();
5660        assert_eq!(scalar, Scalar::Utf8("x".to_owned()));
5661
5662        let scalar: Scalar = serde_json::from_str(r#"{"kind":"string","value":"y"}"#).unwrap();
5663        assert_eq!(scalar, Scalar::Utf8("y".to_owned()));
5664    }
5665
5666    #[test]
5667    fn nancumsum_skips_nulls_and_accumulates() {
5668        let values = vec![
5669            Scalar::Float64(1.0),
5670            Scalar::Null(NullKind::NaN),
5671            Scalar::Float64(2.0),
5672            Scalar::Float64(3.0),
5673        ];
5674        let out = super::nancumsum(&values);
5675        assert!(matches!(out[0], Scalar::Float64(v) if (v - 1.0).abs() < 1e-9));
5676        assert!(out[1].is_missing());
5677        assert!(matches!(out[2], Scalar::Float64(v) if (v - 3.0).abs() < 1e-9));
5678        assert!(matches!(out[3], Scalar::Float64(v) if (v - 6.0).abs() < 1e-9));
5679    }
5680
5681    #[test]
5682    fn nancumprod_skips_nulls_and_multiplies() {
5683        let values = vec![
5684            Scalar::Float64(2.0),
5685            Scalar::Null(NullKind::NaN),
5686            Scalar::Float64(3.0),
5687            Scalar::Float64(4.0),
5688        ];
5689        let out = super::nancumprod(&values);
5690        assert!(matches!(out[0], Scalar::Float64(v) if (v - 2.0).abs() < 1e-9));
5691        assert!(out[1].is_missing());
5692        assert!(matches!(out[2], Scalar::Float64(v) if (v - 6.0).abs() < 1e-9));
5693        assert!(matches!(out[3], Scalar::Float64(v) if (v - 24.0).abs() < 1e-9));
5694    }
5695
5696    #[test]
5697    fn nancummax_tracks_running_max() {
5698        let values = vec![
5699            Scalar::Float64(1.0),
5700            Scalar::Float64(3.0),
5701            Scalar::Null(NullKind::NaN),
5702            Scalar::Float64(2.0),
5703            Scalar::Float64(5.0),
5704        ];
5705        let out = super::nancummax(&values);
5706        assert_eq!(out[0], Scalar::Float64(1.0));
5707        assert_eq!(out[1], Scalar::Float64(3.0));
5708        assert!(out[2].is_missing());
5709        assert_eq!(out[3], Scalar::Float64(3.0));
5710        assert_eq!(out[4], Scalar::Float64(5.0));
5711    }
5712
5713    #[test]
5714    fn nancummin_tracks_running_min() {
5715        let values = vec![
5716            Scalar::Float64(5.0),
5717            Scalar::Float64(3.0),
5718            Scalar::Null(NullKind::NaN),
5719            Scalar::Float64(4.0),
5720            Scalar::Float64(1.0),
5721        ];
5722        let out = super::nancummin(&values);
5723        assert_eq!(out[0], Scalar::Float64(5.0));
5724        assert_eq!(out[1], Scalar::Float64(3.0));
5725        assert!(out[2].is_missing());
5726        assert_eq!(out[3], Scalar::Float64(3.0));
5727        assert_eq!(out[4], Scalar::Float64(1.0));
5728    }
5729
5730    #[test]
5731    fn nancumsum_timedelta64_preserves_dtype_x0x91() {
5732        // Per br-frankenpandas-x0x91: pandas td_series.cumsum() returns
5733        // Timedelta64 running sums. Was silently NaN before.
5734        let one_hour = 3_600 * 1_000_000_000_i64;
5735        let values = vec![
5736            Scalar::Timedelta64(one_hour),
5737            Scalar::Timedelta64(2 * one_hour),
5738            Scalar::Timedelta64(3 * one_hour),
5739        ];
5740        let out = super::nancumsum(&values);
5741        assert_eq!(out[0], Scalar::Timedelta64(one_hour));
5742        assert_eq!(out[1], Scalar::Timedelta64(3 * one_hour));
5743        assert_eq!(out[2], Scalar::Timedelta64(6 * one_hour));
5744    }
5745
5746    #[test]
5747    fn nancummax_nancummin_timedelta64_preserves_dtype_x0x91() {
5748        let one_hour = 3_600 * 1_000_000_000_i64;
5749        let values = vec![
5750            Scalar::Timedelta64(2 * one_hour),
5751            Scalar::Timedelta64(5 * one_hour),
5752            Scalar::Timedelta64(one_hour),
5753            Scalar::Timedelta64(3 * one_hour),
5754        ];
5755        let mx = super::nancummax(&values);
5756        assert_eq!(mx[0], Scalar::Timedelta64(2 * one_hour));
5757        assert_eq!(mx[1], Scalar::Timedelta64(5 * one_hour));
5758        assert_eq!(mx[2], Scalar::Timedelta64(5 * one_hour));
5759        assert_eq!(mx[3], Scalar::Timedelta64(5 * one_hour));
5760
5761        let mn = super::nancummin(&values);
5762        assert_eq!(mn[0], Scalar::Timedelta64(2 * one_hour));
5763        assert_eq!(mn[1], Scalar::Timedelta64(2 * one_hour));
5764        assert_eq!(mn[2], Scalar::Timedelta64(one_hour));
5765        assert_eq!(mn[3], Scalar::Timedelta64(one_hour));
5766    }
5767
5768    #[test]
5769    fn nancumulative_timedelta64_skips_nat_x0x91() {
5770        let one_hour = 3_600 * 1_000_000_000_i64;
5771        let values = vec![
5772            Scalar::Timedelta64(one_hour),
5773            Scalar::Timedelta64(Timedelta::NAT),
5774            Scalar::Timedelta64(2 * one_hour),
5775        ];
5776        let cs = super::nancumsum(&values);
5777        assert_eq!(cs[0], Scalar::Timedelta64(one_hour));
5778        assert!(cs[1].is_missing());
5779        assert_eq!(cs[2], Scalar::Timedelta64(3 * one_hour));
5780    }
5781
5782    #[test]
5783    fn nanquantile_linear_interpolation_matches_numpy() {
5784        let values = vec![
5785            Scalar::Float64(1.0),
5786            Scalar::Float64(2.0),
5787            Scalar::Float64(3.0),
5788            Scalar::Float64(4.0),
5789            Scalar::Float64(5.0),
5790        ];
5791        // median
5792        let q = super::nanquantile(&values, 0.5);
5793        assert!(matches!(q, Scalar::Float64(v) if (v - 3.0).abs() < 1e-9));
5794        // 25th percentile: interpolate between 2.0 and 3.0 at pos 1.0 → 2.0
5795        let q25 = super::nanquantile(&values, 0.25);
5796        assert!(matches!(q25, Scalar::Float64(v) if (v - 2.0).abs() < 1e-9));
5797    }
5798
5799    #[test]
5800    fn nanquantile_ignores_nulls() {
5801        let values = vec![
5802            Scalar::Float64(1.0),
5803            Scalar::Null(NullKind::NaN),
5804            Scalar::Float64(3.0),
5805        ];
5806        let q = super::nanquantile(&values, 0.5);
5807        assert!(matches!(q, Scalar::Float64(v) if (v - 2.0).abs() < 1e-9));
5808    }
5809
5810    #[test]
5811    fn nanquantile_empty_and_out_of_range_yield_null() {
5812        assert!(super::nanquantile(&[], 0.5).is_missing());
5813        assert!(super::nanquantile(&[Scalar::Float64(1.0)], 1.5).is_missing());
5814        assert!(super::nanquantile(&[Scalar::Float64(1.0)], -0.1).is_missing());
5815    }
5816
5817    #[test]
5818    fn nanquantile_timedelta64_preserves_dtype_5djk7() {
5819        // Per br-frankenpandas-5djk7: pandas td_series.quantile(q) returns
5820        // Timedelta64 — was silently NaN before via collect_finite.
5821        let one_hour: i64 = 3_600 * 1_000_000_000;
5822        let vals = vec![
5823            Scalar::Timedelta64(one_hour),
5824            Scalar::Timedelta64(2 * one_hour),
5825            Scalar::Timedelta64(3 * one_hour),
5826            Scalar::Timedelta64(4 * one_hour),
5827            Scalar::Timedelta64(5 * one_hour),
5828        ];
5829        assert_eq!(
5830            super::nanquantile(&vals, 0.5),
5831            Scalar::Timedelta64(3 * one_hour)
5832        );
5833        assert_eq!(
5834            super::nanquantile(&vals, 0.0),
5835            Scalar::Timedelta64(one_hour)
5836        );
5837        assert_eq!(
5838            super::nanquantile(&vals, 1.0),
5839            Scalar::Timedelta64(5 * one_hour)
5840        );
5841    }
5842
5843    #[test]
5844    fn nanquantile_timedelta64_linear_interpolation_5djk7() {
5845        let one_hour: i64 = 3_600 * 1_000_000_000;
5846        let vals = vec![
5847            Scalar::Timedelta64(one_hour),
5848            Scalar::Timedelta64(3 * one_hour),
5849        ];
5850        // Linear interpolation: at q=0.5, midpoint = 2h
5851        assert_eq!(
5852            super::nanquantile(&vals, 0.5),
5853            Scalar::Timedelta64(2 * one_hour)
5854        );
5855    }
5856
5857    #[test]
5858    fn nanargmax_returns_first_position() {
5859        let values = vec![
5860            Scalar::Float64(1.0),
5861            Scalar::Null(NullKind::NaN),
5862            Scalar::Float64(4.0),
5863            Scalar::Float64(4.0),
5864            Scalar::Float64(2.0),
5865        ];
5866        assert_eq!(super::nanargmax(&values), Some(2));
5867    }
5868
5869    #[test]
5870    fn nanargmin_returns_first_position() {
5871        let values = vec![
5872            Scalar::Float64(3.0),
5873            Scalar::Null(NullKind::NaN),
5874            Scalar::Float64(1.0),
5875            Scalar::Float64(1.0),
5876        ];
5877        assert_eq!(super::nanargmin(&values), Some(2));
5878    }
5879
5880    #[test]
5881    fn nanargmax_all_missing_returns_none() {
5882        let values = vec![Scalar::Null(NullKind::NaN), Scalar::Null(NullKind::Null)];
5883        assert_eq!(super::nanargmax(&values), None);
5884        assert_eq!(super::nanargmin(&values), None);
5885    }
5886
5887    #[test]
5888    fn nansem_matches_std_over_sqrt_n() {
5889        let values = vec![
5890            Scalar::Float64(2.0),
5891            Scalar::Float64(4.0),
5892            Scalar::Float64(4.0),
5893            Scalar::Float64(4.0),
5894            Scalar::Float64(5.0),
5895            Scalar::Float64(5.0),
5896            Scalar::Float64(7.0),
5897            Scalar::Float64(9.0),
5898        ];
5899        // numpy/scipy: std(ddof=1) = 2.138089935299395; sem = std/sqrt(8) = 0.7559
5900        let sem = super::nansem(&values, 1);
5901        assert!(matches!(sem, Scalar::Float64(_)));
5902        let Scalar::Float64(v) = sem else {
5903            return;
5904        };
5905        assert!((v - 0.7559289460184544).abs() < 1e-9);
5906    }
5907
5908    #[test]
5909    fn nansem_empty_returns_null() {
5910        assert!(super::nansem(&[], 1).is_missing());
5911        assert!(super::nansem(&[Scalar::Float64(1.0)], 1).is_missing());
5912    }
5913
5914    #[test]
5915    fn nanptp_returns_max_minus_min() {
5916        let values = vec![
5917            Scalar::Float64(3.0),
5918            Scalar::Null(NullKind::NaN),
5919            Scalar::Float64(7.0),
5920            Scalar::Float64(1.0),
5921        ];
5922        assert_eq!(super::nanptp(&values), Scalar::Float64(6.0));
5923    }
5924
5925    #[test]
5926    fn nanptp_empty_returns_null() {
5927        assert!(super::nanptp(&[]).is_missing());
5928        assert!(super::nanptp(&[Scalar::Null(NullKind::NaN)]).is_missing());
5929    }
5930
5931    #[test]
5932    fn nanptp_timedelta64_preserves_dtype_u2g0r() {
5933        // Per br-frankenpandas-u2g0r: ptp on Timedelta64 returns Timedelta64.
5934        let one_hour: i64 = 3_600 * 1_000_000_000;
5935        let values = vec![
5936            Scalar::Timedelta64(one_hour),
5937            Scalar::Timedelta64(5 * one_hour),
5938            Scalar::Timedelta64(2 * one_hour),
5939        ];
5940        assert_eq!(super::nanptp(&values), Scalar::Timedelta64(4 * one_hour));
5941    }
5942
5943    #[test]
5944    fn nanargmax_nanargmin_timedelta64_compare_by_ns_ql1t5() {
5945        // Per br-frankenpandas-ql1t5: argmax/argmin on Timedelta64 compare
5946        // i64 ns directly instead of silently skipping via to_f64.
5947        let one_hour: i64 = 3_600 * 1_000_000_000;
5948        let values = vec![
5949            Scalar::Timedelta64(2 * one_hour),
5950            Scalar::Timedelta64(5 * one_hour),
5951            Scalar::Timedelta64(one_hour),
5952            Scalar::Timedelta64(3 * one_hour),
5953        ];
5954        assert_eq!(super::nanargmax(&values), Some(1));
5955        assert_eq!(super::nanargmin(&values), Some(2));
5956    }
5957
5958    #[test]
5959    fn nanprod_timedelta64_returns_null_szq6a() {
5960        // Per br-frankenpandas-szq6a: pandas raises on Timedelta prod
5961        // (dimensionally undefined). We surface Null instead of the
5962        // misleading Float64(1.0) the old empty-iterator default emitted.
5963        let one_hour: i64 = 3_600 * 1_000_000_000;
5964        let values = vec![
5965            Scalar::Timedelta64(2 * one_hour),
5966            Scalar::Timedelta64(3 * one_hour),
5967        ];
5968        assert!(super::nanprod(&values).is_missing());
5969    }
5970
5971    #[test]
5972    fn nanskew_symmetric_distribution_near_zero() {
5973        let values = vec![
5974            Scalar::Float64(1.0),
5975            Scalar::Float64(2.0),
5976            Scalar::Float64(3.0),
5977            Scalar::Float64(4.0),
5978            Scalar::Float64(5.0),
5979        ];
5980        // Perfectly symmetric -> skew = 0
5981        let skew = super::nanskew(&values);
5982        assert!(matches!(skew, Scalar::Float64(_)));
5983        let Scalar::Float64(v) = skew else {
5984            return;
5985        };
5986        assert!(v.abs() < 1e-9);
5987    }
5988
5989    #[test]
5990    fn nanskew_too_few_values_returns_null() {
5991        assert!(super::nanskew(&[]).is_missing());
5992        assert!(super::nanskew(&[Scalar::Float64(1.0), Scalar::Float64(2.0)]).is_missing());
5993    }
5994
5995    #[test]
5996    fn nankurt_symmetric_uniform_distribution() {
5997        let values = vec![
5998            Scalar::Float64(1.0),
5999            Scalar::Float64(2.0),
6000            Scalar::Float64(3.0),
6001            Scalar::Float64(4.0),
6002            Scalar::Float64(5.0),
6003        ];
6004        // pandas kurt([1,2,3,4,5]) = -1.2
6005        let kurt = super::nankurt(&values);
6006        assert!(matches!(kurt, Scalar::Float64(_)));
6007        let Scalar::Float64(v) = kurt else {
6008            return;
6009        };
6010        assert!((v + 1.2).abs() < 1e-9);
6011    }
6012
6013    #[test]
6014    fn nankurt_too_few_values_returns_null() {
6015        let vals: Vec<Scalar> = (0..3).map(|i| Scalar::Float64(i as f64)).collect();
6016        assert!(super::nankurt(&vals).is_missing());
6017    }
6018
6019    #[test]
6020    fn nanskew_constant_series_returns_zero() {
6021        let values = vec![
6022            Scalar::Float64(5.0),
6023            Scalar::Float64(5.0),
6024            Scalar::Float64(5.0),
6025        ];
6026        assert_eq!(super::nanskew(&values), Scalar::Float64(0.0));
6027        assert_eq!(
6028            super::nankurt(&[
6029                Scalar::Float64(5.0),
6030                Scalar::Float64(5.0),
6031                Scalar::Float64(5.0),
6032                Scalar::Float64(5.0),
6033            ]),
6034            Scalar::Float64(0.0)
6035        );
6036    }
6037
6038    // ── Interval tests (br-frankenpandas-j8k4) ──────────────────────────
6039
6040    #[test]
6041    fn interval_default_closed_is_right() {
6042        assert_eq!(IntervalClosed::default(), IntervalClosed::Right);
6043    }
6044
6045    #[test]
6046    fn interval_left_and_right_closed_helpers() {
6047        assert!(IntervalClosed::Left.left_closed());
6048        assert!(!IntervalClosed::Left.right_closed());
6049        assert!(!IntervalClosed::Right.left_closed());
6050        assert!(IntervalClosed::Right.right_closed());
6051        assert!(IntervalClosed::Both.left_closed());
6052        assert!(IntervalClosed::Both.right_closed());
6053        assert!(!IntervalClosed::Neither.left_closed());
6054        assert!(!IntervalClosed::Neither.right_closed());
6055    }
6056
6057    #[test]
6058    fn interval_display_matches_pandas_notation() {
6059        assert_eq!(
6060            Interval::new(0.0, 5.0, IntervalClosed::Right).to_string(),
6061            "(0, 5]"
6062        );
6063        assert_eq!(
6064            Interval::new(0.0, 5.0, IntervalClosed::Left).to_string(),
6065            "[0, 5)"
6066        );
6067        assert_eq!(
6068            Interval::new(0.0, 5.0, IntervalClosed::Both).to_string(),
6069            "[0, 5]"
6070        );
6071        assert_eq!(
6072            Interval::new(0.0, 5.0, IntervalClosed::Neither).to_string(),
6073            "(0, 5)"
6074        );
6075    }
6076
6077    #[test]
6078    fn interval_length_and_mid() {
6079        let i = Interval::new(2.0, 10.0, IntervalClosed::Right);
6080        assert_eq!(i.length(), 8.0);
6081        assert_eq!(i.mid(), 6.0);
6082    }
6083
6084    #[test]
6085    fn interval_contains_matches_closed_policy() {
6086        let right = Interval::new(0.0, 5.0, IntervalClosed::Right);
6087        assert!(!right.contains(0.0));
6088        assert!(right.contains(2.5));
6089        assert!(right.contains(5.0));
6090
6091        let left = Interval::new(0.0, 5.0, IntervalClosed::Left);
6092        assert!(left.contains(0.0));
6093        assert!(left.contains(2.5));
6094        assert!(!left.contains(5.0));
6095
6096        let both = Interval::new(0.0, 5.0, IntervalClosed::Both);
6097        assert!(both.contains(0.0));
6098        assert!(both.contains(5.0));
6099
6100        let neither = Interval::new(0.0, 5.0, IntervalClosed::Neither);
6101        assert!(!neither.contains(0.0));
6102        assert!(!neither.contains(5.0));
6103        assert!(neither.contains(2.5));
6104    }
6105
6106    #[test]
6107    fn interval_contains_nan_returns_false() {
6108        let i = Interval::new(0.0, 10.0, IntervalClosed::Both);
6109        assert!(!i.contains(f64::NAN));
6110    }
6111
6112    #[test]
6113    fn interval_is_empty_matches_pandas() {
6114        // pd.Interval(3, 3, 'right').is_empty → True
6115        assert!(Interval::new(3.0, 3.0, IntervalClosed::Right).is_empty());
6116        assert!(Interval::new(3.0, 3.0, IntervalClosed::Left).is_empty());
6117        assert!(Interval::new(3.0, 3.0, IntervalClosed::Neither).is_empty());
6118        // pd.Interval(3, 3, 'both').is_empty → False (single point)
6119        assert!(!Interval::new(3.0, 3.0, IntervalClosed::Both).is_empty());
6120        // Non-degenerate intervals are never empty.
6121        assert!(!Interval::new(0.0, 5.0, IntervalClosed::Right).is_empty());
6122    }
6123
6124    #[test]
6125    fn interval_overlaps_disjoint_returns_false() {
6126        let a = Interval::new(0.0, 1.0, IntervalClosed::Right);
6127        let b = Interval::new(2.0, 3.0, IntervalClosed::Right);
6128        assert!(!a.overlaps(&b));
6129        assert!(!b.overlaps(&a));
6130    }
6131
6132    #[test]
6133    fn interval_overlaps_nested_returns_true() {
6134        let outer = Interval::new(0.0, 10.0, IntervalClosed::Right);
6135        let inner = Interval::new(3.0, 7.0, IntervalClosed::Right);
6136        assert!(outer.overlaps(&inner));
6137        assert!(inner.overlaps(&outer));
6138    }
6139
6140    #[test]
6141    fn interval_overlaps_touching_respects_closed_policy() {
6142        // (0, 1] touching (1, 2] at point 1.
6143        let right_right = (
6144            Interval::new(0.0, 1.0, IntervalClosed::Right),
6145            Interval::new(1.0, 2.0, IntervalClosed::Right),
6146        );
6147        // right_right.0 is closed at 1; right_right.1 is open at 1 → no overlap.
6148        assert!(!right_right.0.overlaps(&right_right.1));
6149
6150        // [0, 1] touching [1, 2] — both closed at 1 → overlap.
6151        let both_both = (
6152            Interval::new(0.0, 1.0, IntervalClosed::Both),
6153            Interval::new(1.0, 2.0, IntervalClosed::Both),
6154        );
6155        assert!(both_both.0.overlaps(&both_both.1));
6156    }
6157
6158    #[test]
6159    fn interval_roundtrips_through_serde_json() {
6160        let i = Interval::new(1.5, 3.25, IntervalClosed::Both);
6161        let json = serde_json::to_string(&i).expect("serialize");
6162        let back: Interval = serde_json::from_str(&json).expect("deserialize");
6163        assert_eq!(i, back);
6164    }
6165
6166    #[test]
6167    fn interval_serde_default_closed_is_right_when_missing() {
6168        // JSON payloads that omit `closed` deserialize with the pandas default.
6169        let back: Interval =
6170            serde_json::from_str(r#"{"left":0.0,"right":5.0}"#).expect("deserialize");
6171        assert_eq!(back.closed, IntervalClosed::Right);
6172    }
6173
6174    // ── Period tests (br-frankenpandas-epoj) ────────────────────────────
6175
6176    #[test]
6177    fn period_freq_parses_canonical_aliases() {
6178        assert_eq!(PeriodFreq::parse("A"), Some(PeriodFreq::Annual));
6179        assert_eq!(PeriodFreq::parse("Y"), Some(PeriodFreq::Annual));
6180        assert_eq!(PeriodFreq::parse("Q"), Some(PeriodFreq::Quarterly));
6181        assert_eq!(PeriodFreq::parse("M"), Some(PeriodFreq::Monthly));
6182        assert_eq!(PeriodFreq::parse("W"), Some(PeriodFreq::Weekly));
6183        assert_eq!(PeriodFreq::parse("D"), Some(PeriodFreq::Daily));
6184        assert_eq!(PeriodFreq::parse("B"), Some(PeriodFreq::Business));
6185        assert_eq!(PeriodFreq::parse("H"), Some(PeriodFreq::Hourly));
6186        assert_eq!(PeriodFreq::parse("T"), Some(PeriodFreq::Minutely));
6187        assert_eq!(PeriodFreq::parse("min"), Some(PeriodFreq::Minutely));
6188        assert_eq!(PeriodFreq::parse("S"), Some(PeriodFreq::Secondly));
6189    }
6190
6191    #[test]
6192    fn period_freq_parse_is_case_insensitive() {
6193        assert_eq!(PeriodFreq::parse("quarterly"), Some(PeriodFreq::Quarterly));
6194        assert_eq!(PeriodFreq::parse("MONTHLY"), Some(PeriodFreq::Monthly));
6195    }
6196
6197    #[test]
6198    fn period_freq_rejects_unknown_aliases() {
6199        assert_eq!(PeriodFreq::parse("nanosec"), None);
6200        assert_eq!(PeriodFreq::parse(""), None);
6201        assert_eq!(PeriodFreq::parse("xyz"), None);
6202    }
6203
6204    #[test]
6205    fn period_freq_alias_roundtrip() {
6206        for freq in [
6207            PeriodFreq::Annual,
6208            PeriodFreq::Quarterly,
6209            PeriodFreq::Monthly,
6210            PeriodFreq::Weekly,
6211            PeriodFreq::Daily,
6212            PeriodFreq::Business,
6213            PeriodFreq::Hourly,
6214            PeriodFreq::Minutely,
6215            PeriodFreq::Secondly,
6216        ] {
6217            assert_eq!(PeriodFreq::parse(freq.alias()), Some(freq));
6218        }
6219    }
6220
6221    #[test]
6222    fn period_freq_anchored_aliases_are_pandas_canonical_h2wiv() {
6223        assert_eq!(PeriodFreq::Annual.alias(), "Y-DEC");
6224        assert_eq!(PeriodFreq::Quarterly.alias(), "Q-DEC");
6225        assert_eq!(PeriodFreq::Weekly.alias(), "W-SUN");
6226
6227        assert_eq!(PeriodFreq::parse("A"), Some(PeriodFreq::Annual));
6228        assert_eq!(PeriodFreq::parse("Y"), Some(PeriodFreq::Annual));
6229        assert_eq!(PeriodFreq::parse("Y-DEC"), Some(PeriodFreq::Annual));
6230        assert_eq!(PeriodFreq::parse("Q"), Some(PeriodFreq::Quarterly));
6231        assert_eq!(PeriodFreq::parse("Q-DEC"), Some(PeriodFreq::Quarterly));
6232        assert_eq!(PeriodFreq::parse("W"), Some(PeriodFreq::Weekly));
6233        assert_eq!(PeriodFreq::parse("W-SUN"), Some(PeriodFreq::Weekly));
6234    }
6235
6236    #[test]
6237    fn period_freq_intraday_aliases_are_pandas_canonical_8kfdo() {
6238        assert_eq!(PeriodFreq::Hourly.alias(), "h");
6239        assert_eq!(PeriodFreq::Minutely.alias(), "min");
6240        assert_eq!(PeriodFreq::Secondly.alias(), "s");
6241
6242        assert_eq!(PeriodFreq::parse("H"), Some(PeriodFreq::Hourly));
6243        assert_eq!(PeriodFreq::parse("T"), Some(PeriodFreq::Minutely));
6244        assert_eq!(PeriodFreq::parse("S"), Some(PeriodFreq::Secondly));
6245    }
6246
6247    #[test]
6248    fn period_scalar_accessors_match_pandas_star8() {
6249        let period = Period::new(600, PeriodFreq::Monthly);
6250
6251        assert_eq!(period.ordinal(), 600);
6252        assert_eq!(period.freq(), PeriodFreq::Monthly);
6253        assert_eq!(period.freqstr(), "M");
6254    }
6255
6256    #[test]
6257    fn period_parse_common_pandas_ordinals_avm08() {
6258        assert_eq!(
6259            Period::parse("2024").unwrap(),
6260            Period::new(54, PeriodFreq::Annual)
6261        );
6262        assert_eq!(
6263            Period::parse("2024Q1").unwrap(),
6264            Period::new(216, PeriodFreq::Quarterly)
6265        );
6266        assert_eq!(
6267            Period::parse("2024-01").unwrap(),
6268            Period::new(648, PeriodFreq::Monthly)
6269        );
6270        assert_eq!(
6271            Period::parse("2024-01-15").unwrap(),
6272            Period::new(19_737, PeriodFreq::Daily)
6273        );
6274        assert!(Period::parse("216").is_err());
6275    }
6276
6277    #[test]
6278    fn period_shift_advances_ordinal() {
6279        let q1 = Period::new(216, PeriodFreq::Quarterly);
6280        let q2 = q1.shift(1);
6281        assert_eq!(q2.ordinal, 217);
6282        assert_eq!(q2.freq, PeriodFreq::Quarterly);
6283        let q0 = q1.shift(-1);
6284        assert_eq!(q0.ordinal, 215);
6285    }
6286
6287    #[test]
6288    fn period_shift_saturates_on_overflow() {
6289        let p = Period::new(i64::MAX - 2, PeriodFreq::Daily);
6290        assert_eq!(p.shift(100).ordinal, i64::MAX);
6291        let p = Period::new(i64::MIN + 2, PeriodFreq::Daily);
6292        assert_eq!(p.shift(-100).ordinal, i64::MIN);
6293    }
6294
6295    #[test]
6296    fn period_diff_returns_period_count() {
6297        let a = Period::new(216, PeriodFreq::Quarterly);
6298        let b = Period::new(220, PeriodFreq::Quarterly);
6299        assert_eq!(b.diff(&a), Some(4));
6300        assert_eq!(a.diff(&b), Some(-4));
6301    }
6302
6303    #[test]
6304    fn period_diff_rejects_mismatched_freq() {
6305        let monthly = Period::new(100, PeriodFreq::Monthly);
6306        let quarterly = Period::new(100, PeriodFreq::Quarterly);
6307        assert_eq!(monthly.diff(&quarterly), None);
6308        assert_eq!(quarterly.diff(&monthly), None);
6309    }
6310
6311    #[test]
6312    fn period_cmp_same_freq_respects_ordinal_order() {
6313        use std::cmp::Ordering;
6314        let a = Period::new(10, PeriodFreq::Monthly);
6315        let b = Period::new(20, PeriodFreq::Monthly);
6316        assert_eq!(a.cmp_same_freq(&b), Some(Ordering::Less));
6317        assert_eq!(b.cmp_same_freq(&a), Some(Ordering::Greater));
6318        assert_eq!(a.cmp_same_freq(&a), Some(Ordering::Equal));
6319    }
6320
6321    #[test]
6322    fn period_cmp_cross_freq_returns_none() {
6323        let m = Period::new(1, PeriodFreq::Monthly);
6324        let q = Period::new(1, PeriodFreq::Quarterly);
6325        assert_eq!(m.cmp_same_freq(&q), None);
6326    }
6327
6328    #[test]
6329    fn period_display_carries_freq_and_ordinal() {
6330        let p = Period::new(216, PeriodFreq::Quarterly);
6331        assert_eq!(p.to_string(), "Period[Q-DEC, 216]");
6332    }
6333
6334    #[test]
6335    fn period_roundtrips_through_serde_json() {
6336        let p = Period::new(42, PeriodFreq::Weekly);
6337        let json = serde_json::to_string(&p).expect("serialize");
6338        let back: Period = serde_json::from_str(&json).expect("deserialize");
6339        assert_eq!(p, back);
6340    }
6341
6342    // ── period_range tests (br-frankenpandas-2jef — epoj Phase 2) ───────
6343
6344    use super::period_range;
6345
6346    #[test]
6347    fn period_range_zero_periods_is_empty() {
6348        let start = Period::new(216, PeriodFreq::Quarterly);
6349        assert!(period_range(start, 0).is_empty());
6350    }
6351
6352    #[test]
6353    fn period_range_single_period_returns_start_only() {
6354        let start = Period::new(216, PeriodFreq::Quarterly);
6355        let r = period_range(start, 1);
6356        assert_eq!(r.len(), 1);
6357        assert_eq!(r[0], start);
6358    }
6359
6360    #[test]
6361    fn period_range_increments_ordinal_by_one_per_step() {
6362        let start = Period::new(216, PeriodFreq::Quarterly);
6363        let r = period_range(start, 4);
6364        assert_eq!(r.len(), 4);
6365        assert_eq!(r[0].ordinal, 216);
6366        assert_eq!(r[1].ordinal, 217);
6367        assert_eq!(r[2].ordinal, 218);
6368        assert_eq!(r[3].ordinal, 219);
6369    }
6370
6371    #[test]
6372    fn period_range_preserves_frequency() {
6373        let start = Period::new(0, PeriodFreq::Monthly);
6374        let r = period_range(start, 12);
6375        assert!(r.iter().all(|p| p.freq == PeriodFreq::Monthly));
6376    }
6377
6378    #[test]
6379    fn period_range_negative_starting_ordinal_works() {
6380        // Ordinal axis is signed — pre-epoch periods are valid.
6381        let start = Period::new(-3, PeriodFreq::Annual);
6382        let r = period_range(start, 5);
6383        assert_eq!(
6384            r.iter().map(|p| p.ordinal).collect::<Vec<_>>(),
6385            vec![-3, -2, -1, 0, 1]
6386        );
6387    }
6388
6389    #[test]
6390    fn period_range_large_n_does_not_panic() {
6391        // 1024 monthly periods — large enough to catch any allocation bug.
6392        let start = Period::new(0, PeriodFreq::Monthly);
6393        let r = period_range(start, 1024);
6394        assert_eq!(r.len(), 1024);
6395        assert_eq!(r[1023].ordinal, 1023);
6396    }
6397
6398    // ── interval_range tests (br-frankenpandas-xaom) ────────────────────
6399
6400    use super::{TypeError, interval_range_by_periods, interval_range_by_step};
6401
6402    #[test]
6403    fn interval_range_by_periods_matches_pandas_default_case() {
6404        // pd.interval_range(0, 10, periods=5) → [(0,2],(2,4],(4,6],(6,8],(8,10]]
6405        let bins = interval_range_by_periods(0.0, 10.0, 5, IntervalClosed::Right);
6406        assert_eq!(bins.len(), 5);
6407        for (i, bin) in bins.iter().enumerate() {
6408            assert_eq!(bin.left, (i as f64) * 2.0);
6409            assert_eq!(bin.right, ((i + 1) as f64) * 2.0);
6410            assert_eq!(bin.closed, IntervalClosed::Right);
6411        }
6412    }
6413
6414    #[test]
6415    fn interval_range_by_periods_final_edge_is_exact_end() {
6416        // Guards against accumulated float drift on the last right edge.
6417        let bins = interval_range_by_periods(0.0, 1.0, 7, IntervalClosed::Right);
6418        assert_eq!(bins.last().unwrap().right, 1.0);
6419    }
6420
6421    #[test]
6422    fn interval_range_by_periods_zero_periods_is_empty() {
6423        assert!(interval_range_by_periods(0.0, 10.0, 0, IntervalClosed::Right).is_empty());
6424    }
6425
6426    #[test]
6427    fn interval_range_by_periods_reversed_range_is_empty() {
6428        // pandas: pd.interval_range(10, 0, periods=5) → IntervalIndex([]).
6429        assert!(interval_range_by_periods(10.0, 0.0, 5, IntervalClosed::Right).is_empty());
6430    }
6431
6432    #[test]
6433    fn interval_range_by_periods_preserves_closed_policy() {
6434        for closed in [
6435            IntervalClosed::Left,
6436            IntervalClosed::Right,
6437            IntervalClosed::Both,
6438            IntervalClosed::Neither,
6439        ] {
6440            let bins = interval_range_by_periods(0.0, 4.0, 2, closed);
6441            assert!(bins.iter().all(|b| b.closed == closed));
6442        }
6443    }
6444
6445    #[test]
6446    fn interval_range_by_step_matches_pandas_default_case() {
6447        // pd.interval_range(0, 10, freq=2) → [(0,2],(2,4],(4,6],(6,8],(8,10]]
6448        let bins = interval_range_by_step(0.0, 10.0, 2.0, IntervalClosed::Right).expect("ok");
6449        assert_eq!(bins.len(), 5);
6450        assert_eq!(bins[0].left, 0.0);
6451        assert_eq!(bins[4].right, 10.0);
6452    }
6453
6454    #[test]
6455    fn interval_range_by_step_rejects_non_positive_step() {
6456        assert!(matches!(
6457            interval_range_by_step(0.0, 10.0, 0.0, IntervalClosed::Right),
6458            Err(TypeError::InvalidIntervalStep { .. })
6459        ));
6460        assert!(matches!(
6461            interval_range_by_step(0.0, 10.0, -2.0, IntervalClosed::Right),
6462            Err(TypeError::InvalidIntervalStep { .. })
6463        ));
6464        assert!(matches!(
6465            interval_range_by_step(0.0, 10.0, f64::NAN, IntervalClosed::Right),
6466            Err(TypeError::InvalidIntervalStep { .. })
6467        ));
6468        assert!(matches!(
6469            interval_range_by_step(0.0, 10.0, f64::INFINITY, IntervalClosed::Right),
6470            Err(TypeError::InvalidIntervalStep { .. })
6471        ));
6472    }
6473
6474    #[test]
6475    fn interval_range_by_step_rejects_non_dividing_step() {
6476        // pandas: pd.interval_range(0, 10, freq=3) → ValueError
6477        // (span=10 not divisible by step=3). Reject with IntervalStepDoesNotDivide.
6478        assert!(matches!(
6479            interval_range_by_step(0.0, 10.0, 3.0, IntervalClosed::Right),
6480            Err(TypeError::IntervalStepDoesNotDivide { .. })
6481        ));
6482    }
6483
6484    #[test]
6485    fn interval_range_by_step_reversed_range_is_empty() {
6486        let bins = interval_range_by_step(10.0, 0.0, 2.0, IntervalClosed::Right).expect("ok");
6487        assert!(bins.is_empty());
6488    }
6489
6490    #[test]
6491    fn interval_range_by_step_degenerate_zero_span_is_empty() {
6492        let bins = interval_range_by_step(5.0, 5.0, 1.0, IntervalClosed::Right).expect("ok");
6493        assert!(bins.is_empty());
6494    }
6495
6496    #[test]
6497    fn interval_range_by_step_accepts_float_step_within_tolerance() {
6498        // step=0.1 ten times == 1.0 but float arithmetic produces 0.9999...
6499        let bins = interval_range_by_step(0.0, 1.0, 0.1, IntervalClosed::Right).expect("ok");
6500        assert_eq!(bins.len(), 10);
6501        assert_eq!(bins.last().unwrap().right, 1.0);
6502    }
6503
6504    // ── Timedelta arithmetic tests (br-frankenpandas-4r56 Phase 1) ──────
6505
6506    use super::Timedelta;
6507
6508    #[test]
6509    fn timedelta_add_sums_non_nat() {
6510        let one_hour = Timedelta::NANOS_PER_HOUR;
6511        let one_day = Timedelta::NANOS_PER_DAY;
6512        assert_eq!(Timedelta::add(one_hour, one_day), one_hour + one_day);
6513    }
6514
6515    #[test]
6516    fn timedelta_add_propagates_nat() {
6517        assert_eq!(Timedelta::add(Timedelta::NAT, 100), Timedelta::NAT);
6518        assert_eq!(Timedelta::add(100, Timedelta::NAT), Timedelta::NAT);
6519        assert_eq!(
6520            Timedelta::add(Timedelta::NAT, Timedelta::NAT),
6521            Timedelta::NAT
6522        );
6523    }
6524
6525    #[test]
6526    fn timedelta_add_saturates_on_overflow() {
6527        assert_eq!(Timedelta::add(i64::MAX - 10, 100), i64::MAX);
6528        // Note: i64::MIN is NaT; use MIN+1 to test saturation on the negative side.
6529        assert_eq!(Timedelta::add(i64::MIN + 10, -100), i64::MIN);
6530    }
6531
6532    #[test]
6533    fn timedelta_sub_subtracts_non_nat() {
6534        let one_hour = Timedelta::NANOS_PER_HOUR;
6535        assert_eq!(
6536            Timedelta::sub(one_hour, Timedelta::NANOS_PER_MIN),
6537            one_hour - Timedelta::NANOS_PER_MIN
6538        );
6539    }
6540
6541    #[test]
6542    fn timedelta_sub_propagates_nat() {
6543        assert_eq!(Timedelta::sub(Timedelta::NAT, 100), Timedelta::NAT);
6544        assert_eq!(Timedelta::sub(100, Timedelta::NAT), Timedelta::NAT);
6545    }
6546
6547    #[test]
6548    fn timedelta_neg_flips_sign_non_nat() {
6549        assert_eq!(Timedelta::neg(5), -5);
6550        assert_eq!(Timedelta::neg(-5), 5);
6551        assert_eq!(Timedelta::neg(0), 0);
6552    }
6553
6554    #[test]
6555    fn timedelta_neg_preserves_nat() {
6556        assert_eq!(Timedelta::neg(Timedelta::NAT), Timedelta::NAT);
6557    }
6558
6559    #[test]
6560    fn timedelta_abs_returns_magnitude() {
6561        assert_eq!(Timedelta::abs(-5), 5);
6562        assert_eq!(Timedelta::abs(5), 5);
6563        assert_eq!(Timedelta::abs(0), 0);
6564        assert_eq!(Timedelta::abs(Timedelta::NAT), Timedelta::NAT);
6565    }
6566
6567    #[test]
6568    fn timedelta_mul_scalar_scales() {
6569        let three_hours = Timedelta::NANOS_PER_HOUR * 3;
6570        assert_eq!(
6571            Timedelta::mul_scalar(Timedelta::NANOS_PER_HOUR, 3),
6572            three_hours
6573        );
6574        assert_eq!(Timedelta::mul_scalar(100, 0), 0);
6575        assert_eq!(Timedelta::mul_scalar(100, -2), -200);
6576    }
6577
6578    #[test]
6579    fn timedelta_mul_scalar_saturates() {
6580        assert_eq!(Timedelta::mul_scalar(i64::MAX, 2), i64::MAX);
6581        // (i64::MIN + 1) * 2 saturates to i64::MIN (magnitude too large).
6582        assert_eq!(Timedelta::mul_scalar(i64::MIN + 1, 2), i64::MIN);
6583    }
6584
6585    #[test]
6586    fn timedelta_mul_scalar_propagates_nat() {
6587        assert_eq!(Timedelta::mul_scalar(Timedelta::NAT, 5), Timedelta::NAT);
6588    }
6589
6590    #[test]
6591    fn timedelta_div_scalar_floor_divides() {
6592        // Floor division (matches Python / pandas): -100 // 3 == -34, not -33.
6593        assert_eq!(Timedelta::div_scalar(100, 3), 33);
6594        assert_eq!(Timedelta::div_scalar(-100, 3), -34);
6595        assert_eq!(Timedelta::div_scalar(100, -3), -34);
6596        assert_eq!(Timedelta::div_scalar(-100, -3), 33);
6597    }
6598
6599    #[test]
6600    fn timedelta_div_scalar_zero_divisor_returns_nat() {
6601        assert_eq!(Timedelta::div_scalar(100, 0), Timedelta::NAT);
6602    }
6603
6604    #[test]
6605    fn timedelta_div_scalar_min_neg_one_propagates_nat() {
6606        // i64::MIN aliases NaT, so `div_scalar(i64::MIN, _)` propagates NaT
6607        // — the `i64::MIN / -1` arithmetic-overflow case is subsumed.
6608        assert_eq!(Timedelta::div_scalar(i64::MIN, -1), Timedelta::NAT);
6609        // (i64::MIN + 1) is a real timedelta; `/ -1` does not overflow.
6610        assert_eq!(Timedelta::div_scalar(i64::MIN + 1, -1), i64::MAX);
6611    }
6612
6613    #[test]
6614    fn timedelta_div_scalar_propagates_nat() {
6615        assert_eq!(Timedelta::div_scalar(Timedelta::NAT, 10), Timedelta::NAT);
6616    }
6617
6618    #[test]
6619    fn timedelta_div_timedelta_returns_float_ratio() {
6620        let two_hours = Timedelta::NANOS_PER_HOUR * 2;
6621        let one_hour = Timedelta::NANOS_PER_HOUR;
6622        assert!((Timedelta::div_timedelta(two_hours, one_hour) - 2.0).abs() < 1e-12);
6623        assert!((Timedelta::div_timedelta(one_hour, two_hours) - 0.5).abs() < 1e-12);
6624    }
6625
6626    #[test]
6627    fn timedelta_div_timedelta_nat_returns_nan() {
6628        assert!(Timedelta::div_timedelta(Timedelta::NAT, 100).is_nan());
6629        assert!(Timedelta::div_timedelta(100, Timedelta::NAT).is_nan());
6630    }
6631
6632    // ── Timestamp tests (br-frankenpandas-9p0u — 4r56 Phase 2) ──────────
6633
6634    use super::Timestamp;
6635
6636    #[test]
6637    fn timestamp_from_nanos_is_naive_utc() {
6638        let ts = Timestamp::from_nanos(1_700_000_000_000_000_000);
6639        assert_eq!(ts.nanos, 1_700_000_000_000_000_000);
6640        assert_eq!(ts.tz, None);
6641        assert!(!ts.is_nat());
6642    }
6643
6644    #[test]
6645    fn timestamp_from_nanos_tz_carries_tz_name() {
6646        let ts = Timestamp::from_nanos_tz(1_700_000_000_000_000_000, "US/Eastern");
6647        assert_eq!(ts.tz.as_deref(), Some("US/Eastern"));
6648    }
6649
6650    #[test]
6651    fn timestamp_now_returns_current_time() {
6652        let before = std::time::SystemTime::now()
6653            .duration_since(std::time::UNIX_EPOCH)
6654            .unwrap()
6655            .as_nanos() as i64;
6656        let ts = Timestamp::now();
6657        let after = std::time::SystemTime::now()
6658            .duration_since(std::time::UNIX_EPOCH)
6659            .unwrap()
6660            .as_nanos() as i64;
6661        assert!(ts.nanos >= before);
6662        assert!(ts.nanos <= after);
6663        assert!(!ts.is_nat());
6664    }
6665
6666    #[test]
6667    fn timestamp_today_returns_midnight() {
6668        let ts = Timestamp::today();
6669        assert!(!ts.is_nat());
6670        // Today should be normalized (midnight), so hour/min/sec should be 0
6671        assert_eq!(ts.hour(), Some(0));
6672        assert_eq!(ts.minute(), Some(0));
6673        assert_eq!(ts.second(), Some(0));
6674    }
6675
6676    #[test]
6677    fn timestamp_add_timedelta_shifts_nanos_and_preserves_tz() {
6678        let ts = Timestamp::from_nanos_tz(0, "US/Eastern");
6679        let one_day = Timedelta::NANOS_PER_DAY;
6680        let shifted = ts.add_timedelta(one_day);
6681        assert_eq!(shifted.nanos, one_day);
6682        assert_eq!(shifted.tz.as_deref(), Some("US/Eastern"));
6683    }
6684
6685    #[test]
6686    fn timestamp_add_timedelta_saturates_on_overflow() {
6687        let ts = Timestamp::from_nanos(i64::MAX - 10);
6688        let shifted = ts.add_timedelta(100);
6689        assert_eq!(shifted.nanos, i64::MAX);
6690    }
6691
6692    #[test]
6693    fn timestamp_add_timedelta_propagates_nat() {
6694        // NaT Timestamp + anything = NaT.
6695        assert!(Timestamp::nat().add_timedelta(100).is_nat());
6696        // Timestamp + NaT Timedelta = NaT.
6697        assert!(
6698            Timestamp::from_nanos(0)
6699                .add_timedelta(Timedelta::NAT)
6700                .is_nat()
6701        );
6702    }
6703
6704    #[test]
6705    fn timestamp_sub_timedelta_shifts_backward() {
6706        let ts = Timestamp::from_nanos(1_000);
6707        let shifted = ts.sub_timedelta(Timedelta::NANOS_PER_MICRO);
6708        assert_eq!(shifted.nanos, 0);
6709    }
6710
6711    #[test]
6712    fn timestamp_sub_timestamp_returns_timedelta_nanos() {
6713        let t0 = Timestamp::from_nanos(0);
6714        let t1 = Timestamp::from_nanos(Timedelta::NANOS_PER_HOUR);
6715        assert_eq!(t1.sub_timestamp(&t0), Timedelta::NANOS_PER_HOUR);
6716        assert_eq!(t0.sub_timestamp(&t1), -Timedelta::NANOS_PER_HOUR);
6717    }
6718
6719    #[test]
6720    fn timestamp_sub_timestamp_nat_propagates() {
6721        let ts = Timestamp::from_nanos(1_000);
6722        assert_eq!(Timestamp::nat().sub_timestamp(&ts), Timedelta::NAT);
6723        assert_eq!(ts.sub_timestamp(&Timestamp::nat()), Timedelta::NAT);
6724    }
6725
6726    #[test]
6727    fn timestamp_semantic_eq_treats_two_nat_as_equal() {
6728        assert!(Timestamp::nat().semantic_eq(&Timestamp::nat()));
6729        assert!(!Timestamp::nat().semantic_eq(&Timestamp::from_nanos(0)));
6730        assert!(!Timestamp::from_nanos(0).semantic_eq(&Timestamp::nat()));
6731    }
6732
6733    #[test]
6734    fn timestamp_partial_cmp_orders_by_nanos_nat_is_incomparable() {
6735        use std::cmp::Ordering;
6736        let a = Timestamp::from_nanos(0);
6737        let b = Timestamp::from_nanos(100);
6738        assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
6739        assert_eq!(b.partial_cmp(&a), Some(Ordering::Greater));
6740        assert_eq!(a.partial_cmp(&a), Some(Ordering::Equal));
6741        assert_eq!(a.partial_cmp(&Timestamp::nat()), None);
6742        assert_eq!(Timestamp::nat().partial_cmp(&Timestamp::nat()), None);
6743    }
6744
6745    #[test]
6746    fn timestamp_display_matches_phase2_debug_format() {
6747        assert_eq!(Timestamp::from_nanos(42).to_string(), "Timestamp[42, UTC]");
6748        assert_eq!(
6749            Timestamp::from_nanos_tz(42, "US/Eastern").to_string(),
6750            "Timestamp[42, US/Eastern]"
6751        );
6752        assert_eq!(Timestamp::nat().to_string(), "NaT");
6753    }
6754
6755    #[test]
6756    fn timestamp_value_and_unit_match_pandas_l0edr() {
6757        let ts = Timestamp::from_nanos(1_000_000_123);
6758        assert_eq!(ts.value(), 1_000_000_123);
6759        assert_eq!(ts.unit(), Some("ns"));
6760
6761        let nat = Timestamp::nat();
6762        assert_eq!(nat.value(), Timestamp::NAT);
6763        assert_eq!(nat.unit(), None);
6764    }
6765
6766    #[test]
6767    fn timestamp_numpy_datetime64_materializers_match_value_twksi() {
6768        let ts = Timestamp::from_nanos(1_000_000_123);
6769        assert_eq!(ts.asm8(), ts.value());
6770        assert_eq!(ts.to_datetime64(), ts.value());
6771        assert_eq!(ts.to_numpy(), ts.value());
6772
6773        let nat = Timestamp::nat();
6774        assert_eq!(nat.asm8(), Timestamp::NAT);
6775        assert_eq!(nat.to_datetime64(), Timestamp::NAT);
6776        assert_eq!(nat.to_numpy(), Timestamp::NAT);
6777    }
6778
6779    #[test]
6780    fn timestamp_timestamp_accessor_matches_pandas_microsecond_rounding_py0h3() {
6781        assert_eq!(Timestamp::from_nanos(0).timestamp(), Ok(0.0));
6782        assert_eq!(Timestamp::from_nanos(1_500_000_000).timestamp(), Ok(1.5));
6783        assert_eq!(Timestamp::from_nanos(500).timestamp(), Ok(0.0));
6784        assert_eq!(Timestamp::from_nanos(501).timestamp(), Ok(0.000001));
6785        assert_eq!(Timestamp::from_nanos(2_500).timestamp(), Ok(0.000003));
6786
6787        assert!(matches!(
6788            Timestamp::from_nanos(-500).timestamp(),
6789            Ok(value) if value == -0.0 && value.is_sign_negative()
6790        ));
6791        assert_eq!(Timestamp::from_nanos(-2_500).timestamp(), Ok(-0.000003));
6792        assert_eq!(
6793            Timestamp::nat().timestamp(),
6794            Err(TypeError::ValueIsMissing {
6795                kind: NullKind::NaT,
6796            })
6797        );
6798    }
6799
6800    #[test]
6801    fn timestamp_roundtrips_through_serde_json() {
6802        let naive = Timestamp::from_nanos(1_700_000_000_000_000_000);
6803        let json = serde_json::to_string(&naive).expect("serialize");
6804        let back: Timestamp = serde_json::from_str(&json).expect("deserialize");
6805        assert_eq!(naive, back);
6806
6807        let tz_aware = Timestamp::from_nanos_tz(1_700_000_000_000_000_000, "US/Eastern");
6808        let json = serde_json::to_string(&tz_aware).expect("serialize");
6809        let back: Timestamp = serde_json::from_str(&json).expect("deserialize");
6810        assert_eq!(tz_aware, back);
6811    }
6812
6813    #[test]
6814    fn timestamp_is_send_and_sync() {
6815        fn assert_send_sync<T: Send + Sync>() {}
6816        assert_send_sync::<Timestamp>();
6817    }
6818
6819    // ── Timestamp rounding tests (br-frankenpandas-5h6n) ────────────────
6820
6821    #[test]
6822    fn timestamp_floor_to_rounds_down() {
6823        // 12:34:56 → floor by 1H → 12:00:00
6824        let h = Timedelta::NANOS_PER_HOUR;
6825        let twelve_h = h * 12;
6826        let twelve_thirty_four =
6827            twelve_h + Timedelta::NANOS_PER_MIN * 34 + Timedelta::NANOS_PER_SEC * 56;
6828        let ts = Timestamp::from_nanos(twelve_thirty_four);
6829        let floored = ts.floor_to(h);
6830        assert_eq!(floored.nanos, twelve_h);
6831    }
6832
6833    #[test]
6834    fn timestamp_floor_to_handles_already_aligned() {
6835        // 12:00:00 floored by 1H → 12:00:00 (no change).
6836        let h = Timedelta::NANOS_PER_HOUR;
6837        let twelve_h = h * 12;
6838        let ts = Timestamp::from_nanos(twelve_h);
6839        assert_eq!(ts.floor_to(h).nanos, twelve_h);
6840    }
6841
6842    #[test]
6843    fn timestamp_floor_to_handles_negative_nanos() {
6844        // -100 ns floored by 60 ns:
6845        //   div_euclid(-100, 60) = -2 (since -2*60=-120, rem=20 ≥ 0).
6846        //   result = -2 * 60 = -120.
6847        let ts = Timestamp::from_nanos(-100);
6848        assert_eq!(ts.floor_to(60).nanos, -120);
6849    }
6850
6851    #[test]
6852    fn timestamp_ceil_to_rounds_up() {
6853        // 12:34:56 → ceil by 1H → 13:00:00.
6854        let h = Timedelta::NANOS_PER_HOUR;
6855        let twelve_h = h * 12;
6856        let thirteen_h = h * 13;
6857        let twelve_thirty_four =
6858            twelve_h + Timedelta::NANOS_PER_MIN * 34 + Timedelta::NANOS_PER_SEC * 56;
6859        let ts = Timestamp::from_nanos(twelve_thirty_four);
6860        assert_eq!(ts.ceil_to(h).nanos, thirteen_h);
6861    }
6862
6863    #[test]
6864    fn timestamp_ceil_to_no_op_on_aligned() {
6865        let h = Timedelta::NANOS_PER_HOUR;
6866        let twelve_h = h * 12;
6867        let ts = Timestamp::from_nanos(twelve_h);
6868        assert_eq!(ts.ceil_to(h).nanos, twelve_h);
6869    }
6870
6871    #[test]
6872    fn timestamp_round_to_rounds_to_nearest() {
6873        // 12:30:01 (one second past the half-hour): round to 1H → 13:00:00.
6874        let h = Timedelta::NANOS_PER_HOUR;
6875        let twelve_h = h * 12;
6876        let twelve_thirty_one_sec =
6877            twelve_h + Timedelta::NANOS_PER_MIN * 30 + Timedelta::NANOS_PER_SEC;
6878        let ts = Timestamp::from_nanos(twelve_thirty_one_sec);
6879        assert_eq!(ts.round_to(h).nanos, h * 13);
6880
6881        // 12:29:59 (one second before half): round to 1H → 12:00:00.
6882        let twelve_twenty_nine_sec =
6883            twelve_h + Timedelta::NANOS_PER_MIN * 29 + Timedelta::NANOS_PER_SEC * 59;
6884        let ts = Timestamp::from_nanos(twelve_twenty_nine_sec);
6885        assert_eq!(ts.round_to(h).nanos, twelve_h);
6886    }
6887
6888    #[test]
6889    fn timestamp_round_to_bankers_tie_to_even() {
6890        // Tie cases: rem == unit/2 exactly. Pick even-multiple floor.
6891        // unit=10, so half=5. nanos=5: floor=0 (even), so → 0.
6892        // nanos=15: floor=1 (odd), so → 20.
6893        // nanos=25: floor=2 (even), so → 20.
6894        // nanos=35: floor=3 (odd), so → 40.
6895        assert_eq!(Timestamp::from_nanos(5).round_to(10).nanos, 0);
6896        assert_eq!(Timestamp::from_nanos(15).round_to(10).nanos, 20);
6897        assert_eq!(Timestamp::from_nanos(25).round_to(10).nanos, 20);
6898        assert_eq!(Timestamp::from_nanos(35).round_to(10).nanos, 40);
6899    }
6900
6901    #[test]
6902    fn timestamp_round_to_zero_unit_returns_nat() {
6903        let ts = Timestamp::from_nanos(100);
6904        assert!(ts.round_to(0).is_nat());
6905        assert!(ts.floor_to(0).is_nat());
6906        assert!(ts.ceil_to(0).is_nat());
6907    }
6908
6909    #[test]
6910    fn timestamp_round_to_negative_unit_returns_nat() {
6911        let ts = Timestamp::from_nanos(100);
6912        assert!(ts.round_to(-10).is_nat());
6913        assert!(ts.floor_to(-10).is_nat());
6914        assert!(ts.ceil_to(-10).is_nat());
6915    }
6916
6917    #[test]
6918    fn timestamp_rounding_propagates_nat() {
6919        let nat = Timestamp::nat();
6920        assert!(nat.floor_to(60).is_nat());
6921        assert!(nat.ceil_to(60).is_nat());
6922        assert!(nat.round_to(60).is_nat());
6923    }
6924
6925    #[test]
6926    fn timestamp_rounding_preserves_tz() {
6927        let ts = Timestamp::from_nanos_tz(100, "US/Eastern");
6928        assert_eq!(ts.floor_to(60).tz.as_deref(), Some("US/Eastern"));
6929        assert_eq!(ts.ceil_to(60).tz.as_deref(), Some("US/Eastern"));
6930        assert_eq!(ts.round_to(60).tz.as_deref(), Some("US/Eastern"));
6931    }
6932
6933    // ── Timestamp string-unit rounding tests (br-frankenpandas-lbsx) ────
6934
6935    #[test]
6936    fn timestamp_floor_to_unit_h_rounds_to_hour() {
6937        let h = Timedelta::NANOS_PER_HOUR;
6938        let twelve_h = h * 12;
6939        let twelve_thirty_four =
6940            twelve_h + Timedelta::NANOS_PER_MIN * 34 + Timedelta::NANOS_PER_SEC * 56;
6941        let ts = Timestamp::from_nanos(twelve_thirty_four);
6942        assert_eq!(ts.floor_to_unit("H").nanos, twelve_h);
6943        assert_eq!(ts.floor_to_unit("h").nanos, twelve_h);
6944        assert_eq!(ts.floor_to_unit("hour").nanos, twelve_h);
6945        assert_eq!(ts.floor_to_unit("hours").nanos, twelve_h);
6946        assert_eq!(ts.floor_to_unit("hr").nanos, twelve_h);
6947    }
6948
6949    #[test]
6950    fn timestamp_ceil_to_unit_d_rounds_to_day() {
6951        // 12:34:56 → ceil to 1 day → 24:00:00 (next day).
6952        let h = Timedelta::NANOS_PER_HOUR;
6953        let d = Timedelta::NANOS_PER_DAY;
6954        let twelve_thirty_four = h * 12 + Timedelta::NANOS_PER_MIN * 34;
6955        let ts = Timestamp::from_nanos(twelve_thirty_four);
6956        assert_eq!(ts.ceil_to_unit("D").nanos, d);
6957        assert_eq!(ts.ceil_to_unit("day").nanos, d);
6958        assert_eq!(ts.ceil_to_unit("days").nanos, d);
6959    }
6960
6961    #[test]
6962    fn timestamp_round_to_unit_min_rounds_to_minute() {
6963        // 12:34:31 → round to 1 minute → 12:35:00.
6964        let m = Timedelta::NANOS_PER_MIN;
6965        let twelve_thirty_four_thirty_one =
6966            Timedelta::NANOS_PER_HOUR * 12 + m * 34 + Timedelta::NANOS_PER_SEC * 31;
6967        let ts = Timestamp::from_nanos(twelve_thirty_four_thirty_one);
6968        let expected = Timedelta::NANOS_PER_HOUR * 12 + m * 35;
6969        assert_eq!(ts.round_to_unit("min").nanos, expected);
6970        assert_eq!(ts.round_to_unit("T").nanos, expected); // pandas pre-2.2 alias
6971        assert_eq!(ts.round_to_unit("minute").nanos, expected);
6972    }
6973
6974    #[test]
6975    fn timestamp_floor_ceil_round_aliases_match_unit_methods_li897() {
6976        let ts = Timestamp::from_nanos(
6977            Timedelta::NANOS_PER_HOUR * 12
6978                + Timedelta::NANOS_PER_MIN * 34
6979                + Timedelta::NANOS_PER_SEC * 31,
6980        );
6981
6982        assert_eq!(ts.floor("H"), ts.floor_to_unit("H"));
6983        assert_eq!(ts.ceil("D"), ts.ceil_to_unit("D"));
6984        assert_eq!(ts.round("min"), ts.round_to_unit("min"));
6985    }
6986
6987    #[test]
6988    fn timestamp_normalize_floors_to_day_and_preserves_tz_455op() {
6989        let ts = Timestamp::from_nanos_tz(
6990            Timedelta::NANOS_PER_DAY * 3
6991                + Timedelta::NANOS_PER_HOUR * 12
6992                + Timedelta::NANOS_PER_MIN * 34,
6993            "US/Eastern",
6994        );
6995        let normalized = ts.normalize();
6996
6997        assert_eq!(normalized.nanos, Timedelta::NANOS_PER_DAY * 3);
6998        assert_eq!(normalized.tz.as_deref(), Some("US/Eastern"));
6999        assert!(Timestamp::nat().normalize().is_nat());
7000    }
7001
7002    #[test]
7003    fn timestamp_unit_rounding_unknown_unit_returns_nat() {
7004        let ts = Timestamp::from_nanos(100);
7005        assert!(ts.floor_to_unit("fortnight").is_nat());
7006        assert!(ts.ceil_to_unit("century").is_nat());
7007        assert!(ts.round_to_unit("xyz").is_nat());
7008    }
7009
7010    #[test]
7011    fn timestamp_unit_rounding_propagates_nat() {
7012        let nat = Timestamp::nat();
7013        assert!(nat.floor_to_unit("H").is_nat());
7014        assert!(nat.ceil_to_unit("H").is_nat());
7015        assert!(nat.round_to_unit("H").is_nat());
7016    }
7017
7018    #[test]
7019    fn timestamp_unit_rounding_preserves_tz() {
7020        let ts = Timestamp::from_nanos_tz(Timedelta::NANOS_PER_HOUR * 12 + 100, "US/Eastern");
7021        assert_eq!(ts.floor_to_unit("H").tz.as_deref(), Some("US/Eastern"));
7022        assert_eq!(ts.ceil_to_unit("H").tz.as_deref(), Some("US/Eastern"));
7023        assert_eq!(ts.round_to_unit("H").tz.as_deref(), Some("US/Eastern"));
7024    }
7025
7026    #[test]
7027    fn timedelta_unit_to_nanos_is_now_public_and_matches_pandas_aliases() {
7028        // Public surface check: pandas alias core set.
7029        assert_eq!(
7030            Timedelta::unit_to_nanos("W"),
7031            Some(Timedelta::NANOS_PER_WEEK)
7032        );
7033        assert_eq!(
7034            Timedelta::unit_to_nanos("D"),
7035            Some(Timedelta::NANOS_PER_DAY)
7036        );
7037        assert_eq!(
7038            Timedelta::unit_to_nanos("H"),
7039            Some(Timedelta::NANOS_PER_HOUR)
7040        );
7041        assert_eq!(
7042            Timedelta::unit_to_nanos("min"),
7043            Some(Timedelta::NANOS_PER_MIN)
7044        );
7045        assert_eq!(
7046            Timedelta::unit_to_nanos("s"),
7047            Some(Timedelta::NANOS_PER_SEC)
7048        );
7049        assert_eq!(
7050            Timedelta::unit_to_nanos("ms"),
7051            Some(Timedelta::NANOS_PER_MILLI)
7052        );
7053        assert_eq!(
7054            Timedelta::unit_to_nanos("us"),
7055            Some(Timedelta::NANOS_PER_MICRO)
7056        );
7057        assert_eq!(Timedelta::unit_to_nanos("ns"), Some(1));
7058        // Empty string → days (pandas default).
7059        assert_eq!(Timedelta::unit_to_nanos(""), Some(Timedelta::NANOS_PER_DAY));
7060        // Unknown alias → None.
7061        assert_eq!(Timedelta::unit_to_nanos("century"), None);
7062    }
7063
7064    #[test]
7065    fn timestamp_isoformat_basic() {
7066        let ts = Timestamp::from_nanos(0);
7067        assert_eq!(ts.isoformat(), "1970-01-01T00:00:00");
7068
7069        let ts_utc = Timestamp::from_nanos_tz(0, "UTC");
7070        assert_eq!(ts_utc.isoformat(), "1970-01-01T00:00:00+00:00");
7071
7072        let ts_tz = Timestamp::from_nanos_tz(
7073            Timedelta::NANOS_PER_DAY
7074                + Timedelta::NANOS_PER_HOUR * 14
7075                + Timedelta::NANOS_PER_MIN * 30,
7076            "America/New_York",
7077        );
7078        assert!(ts_tz.isoformat().contains("1970-01-02T14:30:00"));
7079        assert!(ts_tz.isoformat().contains("[America/New_York]"));
7080
7081        assert_eq!(Timestamp::nat().isoformat(), "NaT");
7082    }
7083
7084    #[test]
7085    fn timestamp_strftime_basic() {
7086        let ts = Timestamp::from_nanos(
7087            Timedelta::NANOS_PER_DAY * 365
7088                + Timedelta::NANOS_PER_HOUR * 9
7089                + Timedelta::NANOS_PER_MIN * 15,
7090        );
7091        assert_eq!(ts.strftime("%Y-%m-%d"), "1971-01-01");
7092        assert_eq!(ts.strftime("%H:%M:%S"), "09:15:00");
7093        assert_eq!(ts.strftime("%Y/%m/%d %H:%M"), "1971/01/01 09:15");
7094        assert_eq!(Timestamp::nat().strftime("%Y-%m-%d"), "NaT");
7095    }
7096
7097    #[test]
7098    fn timestamp_day_name_and_month_name() {
7099        let ts = Timestamp::from_nanos(0);
7100        assert_eq!(ts.day_name(), "Thursday");
7101        assert_eq!(ts.month_name(), "January");
7102
7103        let ts2 = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 365);
7104        assert_eq!(ts2.day_name(), "Friday");
7105        assert_eq!(ts2.month_name(), "January");
7106
7107        assert_eq!(Timestamp::nat().day_name(), "NaT");
7108        assert_eq!(Timestamp::nat().month_name(), "NaT");
7109    }
7110
7111    #[test]
7112    fn timestamp_component_accessors() {
7113        let ts = Timestamp::from_nanos(0);
7114        assert_eq!(ts.year(), Some(1970));
7115        assert_eq!(ts.month(), Some(1));
7116        assert_eq!(ts.day(), Some(1));
7117        assert_eq!(ts.hour(), Some(0));
7118        assert_eq!(ts.minute(), Some(0));
7119        assert_eq!(ts.second(), Some(0));
7120        assert_eq!(ts.microsecond(), Some(0));
7121        assert_eq!(ts.nanosecond(), Some(0));
7122
7123        let ts2 = Timestamp::from_nanos(
7124            Timedelta::NANOS_PER_DAY * 365
7125                + Timedelta::NANOS_PER_HOUR * 14
7126                + Timedelta::NANOS_PER_MIN * 30
7127                + Timedelta::NANOS_PER_SEC * 45
7128                + 123_456_789,
7129        );
7130        assert_eq!(ts2.year(), Some(1971));
7131        assert_eq!(ts2.month(), Some(1));
7132        assert_eq!(ts2.day(), Some(1));
7133        assert_eq!(ts2.hour(), Some(14));
7134        assert_eq!(ts2.minute(), Some(30));
7135        assert_eq!(ts2.second(), Some(45));
7136        assert_eq!(ts2.microsecond(), Some(123456));
7137        assert_eq!(ts2.nanosecond(), Some(789));
7138
7139        assert_eq!(Timestamp::nat().year(), None);
7140        assert_eq!(Timestamp::nat().month(), None);
7141        assert_eq!(Timestamp::nat().day(), None);
7142    }
7143
7144    #[test]
7145    fn timestamp_dayofweek_dayofyear_quarter() {
7146        let ts = Timestamp::from_nanos(0);
7147        assert_eq!(ts.dayofweek(), Some(3));
7148        assert_eq!(ts.weekday(), Some(3));
7149        assert_eq!(ts.dayofyear(), Some(1));
7150        assert_eq!(ts.quarter(), Some(1));
7151
7152        let ts2 = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 90);
7153        assert_eq!(ts2.quarter(), Some(2));
7154
7155        let ts3 = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 365);
7156        assert_eq!(ts3.dayofyear(), Some(1));
7157        assert_eq!(ts3.dayofweek(), Some(4));
7158
7159        assert_eq!(Timestamp::nat().dayofweek(), None);
7160        assert_eq!(Timestamp::nat().dayofyear(), None);
7161        assert_eq!(Timestamp::nat().quarter(), None);
7162    }
7163
7164    #[test]
7165    fn timestamp_is_boundary_methods() {
7166        let jan1 = Timestamp::from_nanos(0);
7167        assert_eq!(jan1.is_leap_year(), Some(false));
7168        assert_eq!(jan1.is_month_start(), Some(true));
7169        assert_eq!(jan1.is_month_end(), Some(false));
7170        assert_eq!(jan1.is_quarter_start(), Some(true));
7171        assert_eq!(jan1.is_quarter_end(), Some(false));
7172        assert_eq!(jan1.is_year_start(), Some(true));
7173        assert_eq!(jan1.is_year_end(), Some(false));
7174
7175        let dec31 = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 364);
7176        assert_eq!(dec31.is_month_start(), Some(false));
7177        assert_eq!(dec31.is_month_end(), Some(true));
7178        assert_eq!(dec31.is_quarter_end(), Some(true));
7179        assert_eq!(dec31.is_year_end(), Some(true));
7180
7181        assert_eq!(Timestamp::nat().is_leap_year(), None);
7182        assert_eq!(Timestamp::nat().is_month_start(), None);
7183    }
7184
7185    #[test]
7186    fn timestamp_days_in_month() {
7187        let jan = Timestamp::from_nanos(0);
7188        assert_eq!(jan.days_in_month(), Some(31));
7189        assert_eq!(jan.daysinmonth(), Some(31));
7190
7191        let feb_non_leap = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 31);
7192        assert_eq!(feb_non_leap.days_in_month(), Some(28));
7193
7194        assert_eq!(Timestamp::nat().days_in_month(), None);
7195    }
7196
7197    #[test]
7198    fn timestamp_weekofyear() {
7199        let jan1 = Timestamp::from_nanos(0);
7200        assert_eq!(jan1.weekofyear(), Some(1));
7201        assert_eq!(jan1.week(), Some(1));
7202
7203        let jan8 = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 7);
7204        assert_eq!(jan8.weekofyear(), Some(2));
7205
7206        assert_eq!(Timestamp::nat().weekofyear(), None);
7207        assert_eq!(Timestamp::nat().week(), None);
7208    }
7209
7210    #[test]
7211    fn timestamp_weekofyear_iso_53_week_boundaries() {
7212        // ISO-8601 53-week-year boundaries vs pandas 2.2.3 isocalendar().week.
7213        // (br-frankenpandas-xmfmd) Date -> nanos via days since 1970-01-01.
7214        fn week_of(date_days: i64) -> Option<i64> {
7215            Timestamp::from_nanos(date_days * Timedelta::NANOS_PER_DAY).weekofyear()
7216        }
7217        // Days from 1970-01-01 for each date (UTC, no tz).
7218        // 2021-01-01 -> week 53 (2020 is a 53-week year); FP used to give 52.
7219        assert_eq!(week_of(18_628), Some(53)); // 2021-01-01
7220        // 2016-01-01 -> week 53 (2015 is a 53-week year).
7221        assert_eq!(week_of(16_801), Some(53)); // 2016-01-01
7222        // 2026-12-31 -> week 53; FP used to give 1.
7223        assert_eq!(week_of(20_818), Some(53)); // 2026-12-31
7224        // 2020-12-31 -> week 53.
7225        assert_eq!(week_of(18_627), Some(53)); // 2020-12-31
7226        // Cases that must stay correct (non-53 boundaries):
7227        assert_eq!(week_of(19_358), Some(52)); // 2023-01-01 -> week 52
7228        assert_eq!(week_of(20_087), Some(1)); // 2024-12-30 -> week 1
7229        assert_eq!(week_of(18_260), Some(1)); // 2019-12-30 -> week 1
7230    }
7231
7232    #[test]
7233    fn iso_weeks_in_year_53_week_years() {
7234        use super::iso_weeks_in_year;
7235        // Known 53-week years; everything else is 52.
7236        for y in [2004, 2009, 2015, 2020, 2026] {
7237            assert_eq!(iso_weeks_in_year(y), 53, "{y} should have 53 ISO weeks");
7238        }
7239        for y in [2018, 2019, 2021, 2022, 2023, 2024] {
7240            assert_eq!(iso_weeks_in_year(y), 52, "{y} should have 52 ISO weeks");
7241        }
7242    }
7243
7244    #[test]
7245    fn timestamp_to_unit() {
7246        let ts = Timestamp::from_nanos(1_000_000_000);
7247        assert_eq!(ts.to_unit("ns"), Some(1_000_000_000));
7248        assert_eq!(ts.to_unit("us"), Some(1_000_000));
7249        assert_eq!(ts.to_unit("ms"), Some(1_000));
7250        assert_eq!(ts.to_unit("s"), Some(1));
7251        assert_eq!(ts.to_unit("invalid"), None);
7252
7253        assert_eq!(Timestamp::nat().to_unit("ns"), None);
7254    }
7255
7256    #[test]
7257    fn timestamp_toordinal() {
7258        // 2026-01-01 is ordinal 738886 (days since Jan 1, year 1)
7259        // Days from Unix epoch: 738886 - 719163 = 19723
7260        let nanos_2026_01_01 = 19723_i64 * 24 * 60 * 60 * 1_000_000_000;
7261        let ts = Timestamp::from_nanos(nanos_2026_01_01);
7262        assert_eq!(ts.toordinal(), Some(738886));
7263
7264        // NaT returns None
7265        assert_eq!(Timestamp::nat().toordinal(), None);
7266    }
7267
7268    #[test]
7269    fn timestamp_fromordinal() {
7270        // Round-trip test: create a timestamp from ordinal derived from toordinal
7271        // First create a known timestamp
7272        let nanos_2026_01_01 = 19723_i64 * 24 * 60 * 60 * 1_000_000_000;
7273        let ts_orig = Timestamp::from_nanos(nanos_2026_01_01);
7274        let ordinal = ts_orig.toordinal().unwrap();
7275
7276        // Now convert back using fromordinal
7277        let ts = Timestamp::fromordinal(ordinal);
7278        assert_eq!(ts.year(), ts_orig.year());
7279        assert_eq!(ts.month(), ts_orig.month());
7280        assert_eq!(ts.day(), ts_orig.day());
7281
7282        // Invalid ordinal returns NaT
7283        let nat = Timestamp::fromordinal(0);
7284        assert!(nat.is_nat());
7285    }
7286
7287    #[test]
7288    fn timestamp_parse_iso8601_date_only() {
7289        let ts = Timestamp::parse("2024-01-15").unwrap();
7290        assert_eq!(ts.year(), Some(2024));
7291        assert_eq!(ts.month(), Some(1));
7292        assert_eq!(ts.day(), Some(15));
7293        assert_eq!(ts.hour(), Some(0));
7294        assert_eq!(ts.minute(), Some(0));
7295        assert_eq!(ts.second(), Some(0));
7296    }
7297
7298    #[test]
7299    fn timestamp_parse_iso8601_datetime() {
7300        let ts = Timestamp::parse("2024-01-15T10:30:45").unwrap();
7301        assert_eq!(ts.year(), Some(2024));
7302        assert_eq!(ts.month(), Some(1));
7303        assert_eq!(ts.day(), Some(15));
7304        assert_eq!(ts.hour(), Some(10));
7305        assert_eq!(ts.minute(), Some(30));
7306        assert_eq!(ts.second(), Some(45));
7307    }
7308
7309    #[test]
7310    fn timestamp_parse_space_separator() {
7311        let ts = Timestamp::parse("2024-01-15 10:30:45").unwrap();
7312        assert_eq!(ts.year(), Some(2024));
7313        assert_eq!(ts.hour(), Some(10));
7314    }
7315
7316    #[test]
7317    fn timestamp_parse_with_fractional_seconds() {
7318        let ts = Timestamp::parse("2024-01-15T10:30:45.123456789").unwrap();
7319        assert_eq!(ts.second(), Some(45));
7320        assert_eq!(ts.microsecond(), Some(123456));
7321        assert_eq!(ts.nanosecond(), Some(789));
7322    }
7323
7324    #[test]
7325    fn timestamp_parse_utc_timezone() {
7326        let ts = Timestamp::parse("2024-01-15T10:30:45Z").unwrap();
7327        assert_eq!(ts.tz, Some("UTC".to_string()));
7328    }
7329
7330    #[test]
7331    fn timestamp_parse_offset_timezone() {
7332        let ts = Timestamp::parse("2024-01-15T10:30:45+05:30").unwrap();
7333        assert_eq!(ts.tz, Some("+05:30".to_string()));
7334    }
7335
7336    #[test]
7337    fn timestamp_parse_nat() {
7338        let ts = Timestamp::parse("NaT").unwrap();
7339        assert!(ts.is_nat());
7340        let ts2 = Timestamp::parse("nat").unwrap();
7341        assert!(ts2.is_nat());
7342    }
7343
7344    #[test]
7345    fn timestamp_parse_invalid() {
7346        assert!(Timestamp::parse("not a date").is_err());
7347        assert!(Timestamp::parse("2024-13-01").is_err()); // invalid month
7348        assert!(Timestamp::parse("2024-01-32").is_err()); // invalid day
7349    }
7350
7351    #[test]
7352    fn period_parse_annual() {
7353        let p = Period::parse("2024").unwrap();
7354        assert_eq!(p.freq(), PeriodFreq::Annual);
7355        assert_eq!(p.ordinal(), 2024 - 1970);
7356    }
7357
7358    #[test]
7359    fn period_parse_quarterly() {
7360        let p = Period::parse("2024Q1").unwrap();
7361        assert_eq!(p.freq(), PeriodFreq::Quarterly);
7362        assert_eq!(p.ordinal(), (2024 - 1970) * 4);
7363
7364        let p2 = Period::parse("2024q3").unwrap();
7365        assert_eq!(p2.freq(), PeriodFreq::Quarterly);
7366        assert_eq!(p2.ordinal(), (2024 - 1970) * 4 + 2);
7367    }
7368
7369    #[test]
7370    fn period_parse_monthly() {
7371        let p = Period::parse("2024-01").unwrap();
7372        assert_eq!(p.freq(), PeriodFreq::Monthly);
7373        assert_eq!(p.ordinal(), (2024 - 1970) * 12);
7374
7375        let p2 = Period::parse("2024-12").unwrap();
7376        assert_eq!(p2.freq(), PeriodFreq::Monthly);
7377        assert_eq!(p2.ordinal(), (2024 - 1970) * 12 + 11);
7378    }
7379
7380    #[test]
7381    fn period_parse_nat() {
7382        let p = Period::parse("NaT").unwrap();
7383        assert_eq!(p.ordinal(), i64::MIN);
7384    }
7385
7386    #[test]
7387    fn period_parse_invalid() {
7388        assert!(Period::parse("not a period").is_err());
7389        assert!(Period::parse("2024Q5").is_err()); // invalid quarter
7390        assert!(Period::parse("2024-13").is_err()); // invalid month
7391    }
7392
7393    #[test]
7394    fn interval_parse_basic() {
7395        let i = Interval::parse("[0, 1]").unwrap();
7396        assert_eq!(i.left, 0.0);
7397        assert_eq!(i.right, 1.0);
7398        assert_eq!(i.closed, IntervalClosed::Both);
7399
7400        let i2 = Interval::parse("(0, 1)").unwrap();
7401        assert_eq!(i2.left, 0.0);
7402        assert_eq!(i2.right, 1.0);
7403        assert_eq!(i2.closed, IntervalClosed::Neither);
7404
7405        let i3 = Interval::parse("[0, 1)").unwrap();
7406        assert_eq!(i3.closed, IntervalClosed::Left);
7407
7408        let i4 = Interval::parse("(0, 1]").unwrap();
7409        assert_eq!(i4.closed, IntervalClosed::Right);
7410    }
7411
7412    #[test]
7413    fn interval_parse_floats() {
7414        let i = Interval::parse("[-1.5, 2.5)").unwrap();
7415        assert_eq!(i.left, -1.5);
7416        assert_eq!(i.right, 2.5);
7417        assert_eq!(i.closed, IntervalClosed::Left);
7418    }
7419
7420    #[test]
7421    fn interval_parse_invalid() {
7422        assert!(Interval::parse("invalid").is_err());
7423        assert!(Interval::parse("[0]").is_err());
7424        assert!(Interval::parse("0, 1").is_err()); // missing brackets
7425    }
7426}
fp_types/lib.rs

fp_types/
lib.rs