Skip to main content

fp_index/
lib.rs

1#![forbid(unsafe_code)]
2#![warn(rustdoc::broken_intra_doc_links)]
3
4//! Row-label / index machinery for **frankenpandas** — every
5//! `DataFrame` and `Series` in fp-frame carries an [`Index`] that
6//! pairs labels with positional row indices, plus the alignment
7//! algebra that pandas users expect from `Series + Series` and
8//! `DataFrame.align()`.
9//!
10//! ## Core types
11//!
12//! - [`Index`]: the canonical row-label container. Internally a
13//!   typed `Vec<IndexLabel>` plus an O(1) label-lookup hashmap
14//!   built lazily on first use. Pandas `Index` shape: monotonic
15//!   probes, duplicate handling, range-style construction
16//!   (`Index::from_range(0..N)`), name metadata.
17//! - [`IndexLabel`]: typed label enum — `Int64(i64)`, `Float64(f64)`,
18//!   `Utf8(String)`, `Bool(bool)`, `Datetime64(i64)`,
19//!   `Timedelta64(i64)`. Lets one `Index` carry mixed-type labels
20//!   without erasing to strings.
21//! - [`MultiIndex`]: hierarchical multi-level index for
22//!   pandas-style row MultiIndex DataFrames. Each level is itself
23//!   a `Vec<IndexLabel>` plus an integer codes array.
24//! - [`MultiIndexOrIndex`]: sum-type for code paths that accept
25//!   either flat `Index` or `MultiIndex`.
26//! - [`DuplicateKeep`]: enum controlling `keep='first' | 'last'
27//!   | False` semantics in `Index.duplicated` /
28//!   `Index.drop_duplicates` etc.
29//!
30//! ## Alignment algebra
31//!
32//! Binary ops between two pandas-shaped frames need to align rows
33//! by label. The aligner builds an [`AlignmentPlan`] (or
34//! [`MultiAlignmentPlan`] for N-way joins) that the caller then
35//! applies to each side's value buffers:
36//!
37//! - [`align`] dispatches on [`AlignMode`] (`Left`, `Right`,
38//!   `Inner`, `Outer`).
39//! - [`align_inner`], [`align_left`], [`align_union`]: direct
40//!   single-mode entry points.
41//! - [`leapfrog_union`] / [`leapfrog_intersection`]: N-way row
42//!   alignment via a leapfrog merge over already-sorted indexes
43//!   (used by [`multi_way_align`]).
44//! - [`validate_alignment_plan`]: sanity check (lengths match,
45//!   indices in bounds).
46//!
47//! ## Date / time helpers
48//!
49//! Pandas `pd.date_range` / `pd.timedelta_range` analogs:
50//!
51//! - [`timedelta_range`]: pandas-style timedelta range builder.
52//! - [`apply_date_offset`] / [`apply_date_offset_to_nanos`]:
53//!   evaluate a [`DateOffset`] against an anchor timestamp.
54//! - [`infer_freq`] / [`infer_freq_from_timestamps`] /
55//!   [`infer_freq_from_nanos`]: pandas-style frequency inference
56//!   from a sample of timestamps.
57//! - [`format_datetime_ns`]: render a nanosecond-since-epoch i64
58//!   as the canonical `YYYY-MM-DD HH:MM:SS[.f]` string used in
59//!   IndexLabel display and IO formatters.
60//!
61//! ## Error reporting
62//!
63//! - [`IndexError`]: structural / lookup failures (not-monotonic,
64//!   not-unique, missing-label, validation-mismatch).
65//! - [`TimedeltaRangeError`] / [`DateRangeError`]: range builder
66//!   parse / step / overflow errors.
67//!
68//! ## Relationship to other crates
69//!
70//! - **fp-types** supplies [`Scalar`] / [`Timedelta`] /
71//!   `format_datetime_ns` primitives.
72//! - **fp-frame** stores an `Index` per DataFrame / Series and uses
73//!   the alignment algebra here for binary ops.
74//! - **fp-join** consumes alignment plans for merge-style joins.
75
76use std::{
77    borrow::Cow,
78    collections::HashMap,
79    fmt,
80    sync::{
81        Arc, Mutex, OnceLock,
82        atomic::{AtomicU64, Ordering as AtomicOrdering},
83    },
84};
85
86use chrono::Datelike;
87use fp_types::{Period, PeriodFreq, Scalar, Timedelta, TimedeltaComponents};
88// Dedup / set-op seen-sets key on &IndexLabel and read output order from the
89// INPUT scan (first-seen filter / positional bool), never from map iteration —
90// so the hasher is observationally invisible. FxHash (rustc-hash, pure safe
91// Rust) replaces the std SipHasher on these hot membership maps; public-return
92// maps (position_map_first, groupby) keep std HashMap to avoid an API change.
93use rustc_hash::{FxHashMap, FxHashSet};
94use serde::{Deserialize, Deserializer, Serialize, Serializer};
95use thiserror::Error;
96
97#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
98#[serde(tag = "kind", content = "value", rename_all = "snake_case")]
99pub enum IndexLabel {
100    Int64(i64),
101    Utf8(String),
102    Timedelta64(i64),
103    Datetime64(i64),
104    /// Typed missing label (br-frankenpandas-joeff): lets value_counts
105    /// (dropna=False) and friends keep pandas' distinct None / nan / NaT
106    /// buckets instead of collapsing them or colliding with genuine
107    /// "None"/"nan" strings. Appended LAST so the derived `Ord` sorts null
108    /// labels after every concrete label (pandas NaN-last sort order) without
109    /// disturbing the existing cross-variant order. `Eq`/`Hash` are
110    /// kind-SENSITIVE (None != nan != NaT), matching `ScalarKey::Null`
111    /// bucket identity.
112    Null(fp_types::NullKind),
113}
114
115impl From<i64> for IndexLabel {
116    fn from(value: i64) -> Self {
117        Self::Int64(value)
118    }
119}
120
121impl From<&str> for IndexLabel {
122    fn from(value: &str) -> Self {
123        Self::Utf8(value.to_owned())
124    }
125}
126
127impl From<String> for IndexLabel {
128    fn from(value: String) -> Self {
129        Self::Utf8(value)
130    }
131}
132
133impl IndexLabel {
134    #[must_use]
135    fn is_missing(&self) -> bool {
136        match self {
137            Self::Timedelta64(value) => *value == Timedelta::NAT,
138            Self::Datetime64(value) => *value == i64::MIN,
139            Self::Int64(_) | Self::Utf8(_) => false,
140            Self::Null(_) => true,
141        }
142    }
143}
144
145fn index_label_is_truthy(label: &IndexLabel) -> bool {
146    if label.is_missing() {
147        return false;
148    }
149    match label {
150        IndexLabel::Int64(v) => *v != 0,
151        IndexLabel::Utf8(s) => !s.is_empty(),
152        IndexLabel::Timedelta64(v) => *v != 0,
153        IndexLabel::Datetime64(v) => *v != 0,
154        // Unreachable: is_missing() returned true above for every Null.
155        IndexLabel::Null(_) => false,
156    }
157}
158
159impl fmt::Display for IndexLabel {
160    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
161        match self {
162            Self::Int64(v) => write!(f, "{v}"),
163            Self::Utf8(v) => write!(f, "{v}"),
164            Self::Timedelta64(v) => write!(f, "{}", Timedelta::format(*v)),
165            Self::Datetime64(v) => write!(f, "{}", format_datetime_ns(*v)),
166            // Matches pandas' REPR of missing labels in an index (None / NaN /
167            // NaT — note uppercase NaN: the formatter surface, unlike
168            // str(nan)=='nan' which astype(str) uses). Verified pandas 2.2.3.
169            Self::Null(fp_types::NullKind::Null) => write!(f, "None"),
170            Self::Null(fp_types::NullKind::NaN) => write!(f, "NaN"),
171            Self::Null(fp_types::NullKind::NaT) => write!(f, "NaT"),
172        }
173    }
174}
175
176pub fn format_datetime_ns(nanos: i64) -> String {
177    if nanos == i64::MIN {
178        return "NaT".to_owned();
179    }
180    let secs = nanos / 1_000_000_000;
181    let subsec_nanos = (nanos % 1_000_000_000).unsigned_abs() as u32;
182    let dt = chrono::DateTime::from_timestamp(secs, subsec_nanos)
183        .unwrap_or(chrono::DateTime::UNIX_EPOCH);
184    dt.format("%Y-%m-%d %H:%M:%S").to_string()
185}
186
187/// AG-13: Detected sort order of an index's labels.
188///
189/// Enables adaptive backend selection: binary search for sorted indexes,
190/// HashMap fallback for unsorted. Computed lazily via `OnceLock`.
191#[derive(Debug, Clone, Copy, PartialEq, Eq)]
192enum SortOrder {
193    /// Labels are not in any recognized sorted order.
194    Unsorted,
195    /// All labels are `Int64` and strictly ascending (no duplicates).
196    AscendingInt64,
197    /// All labels are `Utf8` and strictly ascending (no duplicates).
198    AscendingUtf8,
199    /// All labels are `Timedelta64` and strictly ascending (no duplicates).
200    AscendingTimedelta64,
201    /// All labels are `Datetime64` and strictly ascending (no duplicates).
202    AscendingDatetime64,
203}
204
205/// Which set operation a two-pointer sorted merge should emit
206/// (br-frankenpandas-idxdup). Both inputs are strictly ascending and unique.
207#[derive(Debug, Clone, Copy, PartialEq, Eq)]
208enum SetMergeKind {
209    /// Keep `self` labels that also appear in `other`.
210    Intersection,
211    /// Keep `self` labels that do NOT appear in `other`.
212    Difference,
213}
214
215/// Detect the sort order of the label slice.
216fn detect_sort_order(labels: &[IndexLabel]) -> SortOrder {
217    if labels.len() <= 1 {
218        return match labels.first() {
219            Some(IndexLabel::Int64(_)) | None => SortOrder::AscendingInt64,
220            Some(IndexLabel::Utf8(_)) => SortOrder::AscendingUtf8,
221            Some(IndexLabel::Timedelta64(_)) => SortOrder::AscendingTimedelta64,
222            Some(IndexLabel::Datetime64(_)) => SortOrder::AscendingDatetime64,
223            // Null labels never enable a typed binary-search backend.
224            Some(IndexLabel::Null(_)) => SortOrder::Unsorted,
225        };
226    }
227
228    // Check if all Int64 and strictly ascending.
229    let all_int = labels.iter().all(|l| matches!(l, IndexLabel::Int64(_)));
230    if all_int {
231        let is_sorted = labels.windows(2).all(|w| {
232            if let (IndexLabel::Int64(a), IndexLabel::Int64(b)) = (&w[0], &w[1]) {
233                a < b
234            } else {
235                false
236            }
237        });
238        if is_sorted {
239            return SortOrder::AscendingInt64;
240        }
241    }
242
243    // Check if all Utf8 and strictly ascending.
244    let all_utf8 = labels.iter().all(|l| matches!(l, IndexLabel::Utf8(_)));
245    if all_utf8 {
246        let is_sorted = labels.windows(2).all(|w| {
247            if let (IndexLabel::Utf8(a), IndexLabel::Utf8(b)) = (&w[0], &w[1]) {
248                a < b
249            } else {
250                false
251            }
252        });
253        if is_sorted {
254            return SortOrder::AscendingUtf8;
255        }
256    }
257
258    // Check if all Timedelta64 and strictly ascending.
259    let all_td = labels
260        .iter()
261        .all(|l| matches!(l, IndexLabel::Timedelta64(_)));
262    if all_td {
263        let is_sorted = labels.windows(2).all(|w| {
264            if let (IndexLabel::Timedelta64(a), IndexLabel::Timedelta64(b)) = (&w[0], &w[1]) {
265                a < b
266            } else {
267                false
268            }
269        });
270        if is_sorted {
271            return SortOrder::AscendingTimedelta64;
272        }
273    }
274
275    // Check if all Datetime64 and strictly ascending.
276    let all_dt = labels
277        .iter()
278        .all(|l| matches!(l, IndexLabel::Datetime64(_)));
279    if all_dt {
280        let is_sorted = labels.windows(2).all(|w| {
281            if let (IndexLabel::Datetime64(a), IndexLabel::Datetime64(b)) = (&w[0], &w[1]) {
282                a < b
283            } else {
284                false
285            }
286        });
287        if is_sorted {
288            return SortOrder::AscendingDatetime64;
289        }
290    }
291
292    SortOrder::Unsorted
293}
294
295#[derive(Debug, Clone, Copy, PartialEq, Eq)]
296pub enum DuplicateKeep {
297    First,
298    Last,
299    None,
300}
301
302static INDEX_LABEL_ID_COUNTER: AtomicU64 = AtomicU64::new(1);
303static INDEX_LABEL_EQUALITY_CACHE: OnceLock<Mutex<FxHashMap<(u64, u64), bool>>> = OnceLock::new();
304
305const INDEX_LABEL_EQUALITY_CACHE_MAX: usize = 4096;
306
307fn next_index_label_identity() -> u64 {
308    INDEX_LABEL_ID_COUNTER.fetch_add(1, AtomicOrdering::Relaxed)
309}
310
311#[derive(Debug, Clone, Copy, PartialEq, Eq)]
312struct Int64UnitRangeLabels {
313    start: i64,
314    len: usize,
315}
316
317impl Int64UnitRangeLabels {
318    fn new(start: i64, len: usize) -> Option<Self> {
319        if len > 0 {
320            let last_offset = i64::try_from(len.checked_sub(1)?).ok()?;
321            start.checked_add(last_offset)?;
322        }
323        Some(Self { start, len })
324    }
325
326    fn materialize(self) -> Vec<IndexLabel> {
327        let mut labels = Vec::with_capacity(self.len);
328        for offset in 0..self.len {
329            let offset = i64::try_from(offset).expect("validated Int64 unit range length");
330            labels.push(IndexLabel::Int64(
331                self.start
332                    .checked_add(offset)
333                    .expect("validated Int64 unit range end"),
334            ));
335        }
336        labels
337    }
338
339    fn position(self, target: i64) -> Option<usize> {
340        let offset = target.checked_sub(self.start)?;
341        let offset = usize::try_from(offset).ok()?;
342        (offset < self.len).then_some(offset)
343    }
344
345    fn equals_slice(self, labels: &[IndexLabel]) -> bool {
346        labels.len() == self.len
347            && labels.iter().enumerate().all(|(offset, label)| {
348                let Ok(offset) = i64::try_from(offset) else {
349                    return false;
350                };
351                matches!(
352                    label,
353                    IndexLabel::Int64(value)
354                        if self.start.checked_add(offset).is_some_and(|expected| *value == expected)
355                )
356            })
357    }
358}
359
360struct IndexLabels {
361    /// Shared immutable label vector (br-frankenpandas-idxclone). Behind `Arc`
362    /// so cloning an `Index` is an O(1) refcount bump instead of an O(n)
363    /// `Vec<IndexLabel>` deep copy — the dominant cost of same-index binary ops
364    /// (`a + b` re-uses the operand index). Set once, never mutated, so sharing
365    /// is observationally identical to a private copy.
366    materialized: OnceLock<Arc<Vec<IndexLabel>>>,
367    int64_unit_range: Option<Int64UnitRangeLabels>,
368    /// Lazy typed Int64 backing (br-frankenpandas-dxqpm). `Some(values)` once
369    /// computed means every label is `IndexLabel::Int64` and `values` is the
370    /// raw `i64` view; `None` once computed means the labels are not all
371    /// Int64. Pre-seeded by typed constructors so gathers/clones/drops of
372    /// Int64-labelled indexes stay on contiguous `i64` storage instead of the
373    /// 32 B enum representation.
374    int64_typed: OnceLock<Option<Arc<Vec<i64>>>>,
375}
376
377impl IndexLabels {
378    fn new(labels: Vec<IndexLabel>) -> Self {
379        let materialized = OnceLock::new();
380        let _ = materialized.set(Arc::new(labels));
381        Self {
382            materialized,
383            int64_unit_range: None,
384            int64_typed: OnceLock::new(),
385        }
386    }
387
388    fn new_int64_unit_range(start: i64, len: usize) -> Option<Self> {
389        Some(Self {
390            materialized: OnceLock::new(),
391            int64_unit_range: Some(Int64UnitRangeLabels::new(start, len)?),
392            int64_typed: OnceLock::new(),
393        })
394    }
395
396    fn new_int64_values(values: Arc<Vec<i64>>) -> Self {
397        let int64_typed = OnceLock::new();
398        let _ = int64_typed.set(Some(values));
399        Self {
400            materialized: OnceLock::new(),
401            int64_unit_range: None,
402            int64_typed,
403        }
404    }
405
406    fn as_slice(&self) -> &[IndexLabel] {
407        self.materialized
408            .get_or_init(|| {
409                if let Some(range) = self.int64_unit_range {
410                    return Arc::new(range.materialize());
411                }
412                let values = self
413                    .int64_typed
414                    .get()
415                    .and_then(Option::as_ref)
416                    .expect("lazy index labels require a typed or range backing");
417                Arc::new(values.iter().copied().map(IndexLabel::Int64).collect())
418            })
419            .as_slice()
420    }
421
422    fn len(&self) -> usize {
423        if let Some(range) = self.int64_unit_range {
424            return range.len;
425        }
426        if let Some(labels) = self.materialized.get() {
427            return labels.len();
428        }
429        if let Some(Some(values)) = self.int64_typed.get() {
430            return values.len();
431        }
432        self.as_slice().len()
433    }
434
435    fn is_empty(&self) -> bool {
436        self.len() == 0
437    }
438
439    fn int64_unit_range(&self) -> Option<Int64UnitRangeLabels> {
440        self.int64_unit_range
441    }
442
443    /// The raw `i64` view of an all-Int64 label vector, computing and caching
444    /// it on first request. `None` means at least one label is not Int64.
445    fn int64_view(&self) -> Option<Arc<Vec<i64>>> {
446        self.int64_typed
447            .get_or_init(|| {
448                if let Some(range) = self.int64_unit_range {
449                    let mut values = Vec::with_capacity(range.len);
450                    for offset in 0..range.len {
451                        let offset =
452                            i64::try_from(offset).expect("validated Int64 unit range length");
453                        values.push(
454                            range
455                                .start
456                                .checked_add(offset)
457                                .expect("validated Int64 unit range end"),
458                        );
459                    }
460                    return Some(Arc::new(values));
461                }
462                let labels = self.materialized.get()?;
463                let mut values = Vec::with_capacity(labels.len());
464                for label in labels.iter() {
465                    match label {
466                        IndexLabel::Int64(value) => values.push(*value),
467                        _ => return None,
468                    }
469                }
470                Some(Arc::new(values))
471            })
472            .clone()
473    }
474
475    /// The cached `i64` view if it has already been computed (never computes).
476    /// Outer `None` = not yet computed; `Some(None)` = known non-Int64.
477    fn cached_int64_view(&self) -> Option<Option<Arc<Vec<i64>>>> {
478        self.int64_typed.get().cloned()
479    }
480}
481
482impl Clone for IndexLabels {
483    fn clone(&self) -> Self {
484        let int64_typed = OnceLock::new();
485        if let Some(view) = self.int64_typed.get() {
486            let _ = int64_typed.set(view.clone());
487        }
488        let materialized = OnceLock::new();
489        // A unit-range or typed Int64 backing can regenerate the label vector
490        // on demand, so skip the O(n) Vec<IndexLabel> deep clone in that case.
491        let has_lazy_backing =
492            self.int64_unit_range.is_some() || matches!(int64_typed.get(), Some(Some(_)));
493        if !has_lazy_backing && let Some(labels) = self.materialized.get() {
494            let _ = materialized.set(labels.clone());
495        }
496        Self {
497            materialized,
498            int64_unit_range: self.int64_unit_range,
499            int64_typed,
500        }
501    }
502}
503
504impl Default for IndexLabels {
505    fn default() -> Self {
506        Self::new(Vec::new())
507    }
508}
509
510impl fmt::Debug for IndexLabels {
511    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
512        self.as_slice().fmt(f)
513    }
514}
515
516impl PartialEq for IndexLabels {
517    fn eq(&self, other: &Self) -> bool {
518        match (self.int64_unit_range, other.int64_unit_range) {
519            (Some(left), Some(right)) => left == right,
520            (Some(range), None) => range.equals_slice(other.as_slice()),
521            (None, Some(range)) => range.equals_slice(self.as_slice()),
522            (None, None) => self.as_slice() == other.as_slice(),
523        }
524    }
525}
526
527impl Eq for IndexLabels {}
528
529impl std::ops::Deref for IndexLabels {
530    type Target = [IndexLabel];
531
532    fn deref(&self) -> &Self::Target {
533        self.as_slice()
534    }
535}
536
537impl<'a> IntoIterator for &'a IndexLabels {
538    type Item = &'a IndexLabel;
539    type IntoIter = std::slice::Iter<'a, IndexLabel>;
540
541    fn into_iter(self) -> Self::IntoIter {
542        self.as_slice().iter()
543    }
544}
545
546impl Serialize for IndexLabels {
547    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
548    where
549        S: Serializer,
550    {
551        self.as_slice().serialize(serializer)
552    }
553}
554
555impl<'de> Deserialize<'de> for IndexLabels {
556    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
557    where
558        D: Deserializer<'de>,
559    {
560        Vec::<IndexLabel>::deserialize(deserializer).map(Self::new)
561    }
562}
563
564#[derive(Debug, Clone, Serialize, Deserialize)]
565pub struct Index {
566    #[serde(default)]
567    labels: IndexLabels,
568    /// Optional name for the index (matches pandas `Index.name`).
569    #[serde(default, skip_serializing_if = "Option::is_none")]
570    name: Option<String>,
571    /// Runtime-only immutable identity for this label vector lineage.
572    #[serde(skip, default = "next_index_label_identity")]
573    label_identity: u64,
574    #[serde(skip)]
575    duplicate_cache: OnceLock<bool>,
576    /// AG-13: Cached sort order for adaptive backend selection.
577    #[serde(skip)]
578    sort_order_cache: OnceLock<SortOrder>,
579    /// Runtime-only cache for labels-derived AACE semantic fingerprints.
580    #[serde(skip)]
581    semantic_fingerprint_cache: OnceLock<String>,
582}
583
584impl PartialEq for Index {
585    fn eq(&self, other: &Self) -> bool {
586        self.labels_equal(other)
587    }
588}
589
590impl Eq for Index {}
591
592fn detect_duplicates(labels: &[IndexLabel]) -> bool {
593    let mut seen = FxHashMap::<&IndexLabel, ()>::default();
594    for label in labels {
595        if seen.insert(label, ()).is_some() {
596            return true;
597        }
598    }
599    false
600}
601
602fn ordered_label_identity_pair(left: u64, right: u64) -> (u64, u64) {
603    if left <= right {
604        (left, right)
605    } else {
606        (right, left)
607    }
608}
609
610impl Index {
611    #[must_use]
612    pub fn new(labels: Vec<IndexLabel>) -> Self {
613        Self {
614            labels: IndexLabels::new(labels),
615            name: None,
616            label_identity: next_index_label_identity(),
617            duplicate_cache: OnceLock::new(),
618            sort_order_cache: OnceLock::new(),
619            semantic_fingerprint_cache: OnceLock::new(),
620        }
621    }
622
623    fn labels_equal(&self, other: &Self) -> bool {
624        if self.label_identity == other.label_identity {
625            return true;
626        }
627
628        let key = ordered_label_identity_pair(self.label_identity, other.label_identity);
629        let cache = INDEX_LABEL_EQUALITY_CACHE.get_or_init(|| Mutex::new(FxHashMap::default()));
630        if let Some(equal) = cache
631            .lock()
632            .expect("index label equality cache poisoned")
633            .get(&key)
634            .copied()
635        {
636            return equal;
637        }
638
639        let equal = self.labels == other.labels;
640        let mut guard = cache.lock().expect("index label equality cache poisoned");
641        if guard.len() >= INDEX_LABEL_EQUALITY_CACHE_MAX {
642            guard.clear();
643        }
644        guard.insert(key, equal);
645        equal
646    }
647
648    /// Construct an index whose caller has already proven all labels unique.
649    ///
650    /// This preserves the public `Index::new` surface while letting alignment
651    /// builders carry their uniqueness proof into the runtime duplicate cache.
652    #[must_use]
653    #[doc(hidden)]
654    pub fn new_known_unique(labels: Vec<IndexLabel>) -> Self {
655        debug_assert!(!detect_duplicates(&labels));
656        let index = Self::new(labels);
657        let _ = index.duplicate_cache.set(false);
658        index
659    }
660
661    /// Construct an index whose labels are the dense unit range
662    /// `start..start+len`, without allocating the label vector until a caller
663    /// asks for label materialization.
664    #[must_use]
665    #[doc(hidden)]
666    pub fn new_known_unique_int64_unit_range(start: i64, len: usize) -> Self {
667        let labels = IndexLabels::new_int64_unit_range(start, len)
668            .expect("validated Int64 unit range bounds");
669        let index = Self {
670            labels,
671            name: None,
672            label_identity: next_index_label_identity(),
673            duplicate_cache: OnceLock::new(),
674            sort_order_cache: OnceLock::new(),
675            semantic_fingerprint_cache: OnceLock::new(),
676        };
677        let _ = index.duplicate_cache.set(false);
678        let _ = index.sort_order_cache.set(SortOrder::AscendingInt64);
679        index
680    }
681
682    #[must_use]
683    pub fn from_i64(values: Vec<i64>) -> Self {
684        Self::from_i64_values(values)
685    }
686
687    /// Construct an index over Int64 labels backed by a contiguous `Vec<i64>`
688    /// (br-frankenpandas-dxqpm). Label materialization into `IndexLabel`s is
689    /// deferred until a caller asks for `labels()`; clones of the index share
690    /// the typed backing instead of deep-copying the enum vector.
691    #[must_use]
692    #[doc(hidden)]
693    pub fn from_i64_values(values: Vec<i64>) -> Self {
694        Self {
695            labels: IndexLabels::new_int64_values(Arc::new(values)),
696            name: None,
697            label_identity: next_index_label_identity(),
698            duplicate_cache: OnceLock::new(),
699            sort_order_cache: OnceLock::new(),
700            semantic_fingerprint_cache: OnceLock::new(),
701        }
702    }
703
704    /// Raw `i64` view of an all-Int64 label vector, computing and caching it
705    /// on first request. `None` means at least one label is not Int64.
706    #[must_use]
707    #[doc(hidden)]
708    pub fn int64_label_values(&self) -> Option<Arc<Vec<i64>>> {
709        self.labels.int64_view()
710    }
711
712    /// The cached `i64` label view if already computed (never computes).
713    /// Outer `None` = not yet computed; `Some(None)` = known non-Int64.
714    #[must_use]
715    #[doc(hidden)]
716    pub fn cached_int64_label_values(&self) -> Option<Option<Arc<Vec<i64>>>> {
717        self.labels.cached_int64_view()
718    }
719
720    #[must_use]
721    pub fn from_utf8(values: Vec<String>) -> Self {
722        Self::new(values.into_iter().map(IndexLabel::from).collect())
723    }
724
725    #[must_use]
726    pub fn from_timedelta64(nanos: Vec<i64>) -> Self {
727        Self::new(nanos.into_iter().map(IndexLabel::Timedelta64).collect())
728    }
729
730    #[must_use]
731    pub fn from_datetime64(nanos: Vec<i64>) -> Self {
732        Self::new(nanos.into_iter().map(IndexLabel::Datetime64).collect())
733    }
734
735    #[must_use]
736    pub fn len(&self) -> usize {
737        self.labels.len()
738    }
739
740    #[must_use]
741    pub fn is_empty(&self) -> bool {
742        self.labels.is_empty()
743    }
744
745    #[must_use]
746    pub fn labels(&self) -> &[IndexLabel] {
747        self.labels.as_slice()
748    }
749
750    #[must_use]
751    #[doc(hidden)]
752    pub fn int64_unit_range_labels(&self) -> Option<(i64, usize)> {
753        self.labels
754            .int64_unit_range()
755            .map(|range| (range.start, range.len))
756    }
757
758    #[must_use]
759    pub fn semantic_labels_fingerprint_with<F>(&self, compute: F) -> String
760    where
761        F: FnOnce(&[IndexLabel]) -> String,
762    {
763        self.semantic_fingerprint_cache
764            .get_or_init(|| compute(self.labels()))
765            .clone()
766    }
767
768    /// Return the index name (matches `pd.Index.name`).
769    #[must_use]
770    pub fn name(&self) -> Option<&str> {
771        self.name.as_deref()
772    }
773
774    /// Return a new index with the given name (matches `pd.Index.set_names`).
775    #[must_use]
776    pub fn set_names(&self, name: Option<&str>) -> Self {
777        let mut idx = self.clone();
778        idx.name = name.map(String::from);
779        idx
780    }
781
782    /// Alias for `set_names` — set the index name, returning a new `Index`.
783    #[must_use]
784    pub fn set_name(&self, name: &str) -> Self {
785        self.set_names(Some(name))
786    }
787
788    /// Return a list of index names.
789    ///
790    /// Matches `pd.Index.names`. For a flat (non-MultiIndex) index this returns
791    /// a single-element list with the current name (or `None`).
792    #[must_use]
793    pub fn names(&self) -> Vec<Option<String>> {
794        vec![self.name.clone()]
795    }
796
797    /// Set names from a list.
798    ///
799    /// Matches `pd.Index.set_names([name])`. For flat index only the first
800    /// element is used. Panics if the list is empty.
801    #[must_use]
802    pub fn set_names_list(&self, names: &[Option<&str>]) -> Self {
803        assert!(
804            !names.is_empty(),
805            "set_names_list requires at least one name"
806        );
807        self.set_names(names[0])
808    }
809
810    /// Return the index as-is (flat index identity).
811    ///
812    /// Matches `pd.Index.to_flat_index()`. For a non-MultiIndex this is a
813    /// no-op that returns a clone. For a MultiIndex it would convert tuples
814    /// to flat labels.
815    #[must_use]
816    pub fn to_flat_index(&self) -> Self {
817        self.clone()
818    }
819
820    /// Return a new index with the name cleared.
821    #[must_use]
822    pub fn rename_index(&self, name: Option<&str>) -> Self {
823        self.set_names(name)
824    }
825
826    /// Internal: propagate this index's name onto a newly created index.
827    fn propagate_name(&self, mut other: Self) -> Self {
828        other.name.clone_from(&self.name);
829        other
830    }
831
832    /// Internal: if both indexes share the same name, return it; otherwise None.
833    /// Matches pandas behavior for binary set operations.
834    fn shared_name(&self, other: &Self) -> Option<String> {
835        if self.name == other.name {
836            self.name.clone()
837        } else {
838            None
839        }
840    }
841
842    #[must_use]
843    pub fn has_duplicates(&self) -> bool {
844        if self.labels.int64_unit_range().is_some() {
845            return false;
846        }
847        *self.duplicate_cache.get_or_init(|| {
848            // Every `SortOrder::Ascending*` variant is STRICTLY ascending
849            // (`detect_sort_order` rejects equal neighbours with `a < b`), so a
850            // recognized sort order proves uniqueness with zero hashing
851            // (br-frankenpandas-idxdup). `sort_order()` is a single linear pass
852            // (itself cached and reused by the binary-search backends), far
853            // cheaper than the FxHashMap insert-per-label below; only genuinely
854            // unsorted indexes fall through to it.
855            if !matches!(self.sort_order(), SortOrder::Unsorted) {
856                return false;
857            }
858            detect_duplicates(self.labels())
859        })
860    }
861
862    /// Whether all index labels are unique.
863    ///
864    /// Matches `pd.Index.is_unique`.
865    #[must_use]
866    pub fn is_unique(&self) -> bool {
867        !self.has_duplicates()
868    }
869
870    /// Get the position (integer location) of a label.
871    ///
872    /// Matches `pd.Index.get_loc(label)`.
873    #[must_use]
874    pub fn get_loc(&self, label: &IndexLabel) -> Option<usize> {
875        self.position(label)
876    }
877
878    /// AG-13: Lazily detect and cache the sort order of this index.
879    #[must_use]
880    fn sort_order(&self) -> SortOrder {
881        if self.labels.int64_unit_range().is_some() {
882            return SortOrder::AscendingInt64;
883        }
884        *self
885            .sort_order_cache
886            .get_or_init(|| detect_sort_order(self.labels()))
887    }
888
889    /// Returns `true` if this index is sorted (strictly ascending, no duplicates).
890    #[must_use]
891    pub fn is_sorted(&self) -> bool {
892        !matches!(self.sort_order(), SortOrder::Unsorted)
893    }
894
895    /// AG-13: Adaptive position lookup.
896    ///
897    /// For sorted `Int64` or `Utf8` indexes, uses binary search (O(log n)).
898    /// For unsorted indexes, falls back to linear scan (O(n)).
899    #[must_use]
900    pub fn position(&self, needle: &IndexLabel) -> Option<usize> {
901        if let (Some(range), IndexLabel::Int64(target)) = (self.labels.int64_unit_range(), needle) {
902            return range.position(*target);
903        }
904        match self.sort_order() {
905            SortOrder::AscendingInt64 => {
906                if let IndexLabel::Int64(target) = needle {
907                    self.labels
908                        .binary_search_by(|label| {
909                            if let IndexLabel::Int64(v) = label {
910                                v.cmp(target)
911                            } else {
912                                std::cmp::Ordering::Less
913                            }
914                        })
915                        .ok()
916                } else {
917                    None // Type mismatch: no Int64 label can match a Utf8 needle
918                }
919            }
920            SortOrder::AscendingUtf8 => {
921                if let IndexLabel::Utf8(target) = needle {
922                    self.labels
923                        .binary_search_by(|label| {
924                            if let IndexLabel::Utf8(v) = label {
925                                v.as_str().cmp(target.as_str())
926                            } else {
927                                std::cmp::Ordering::Less
928                            }
929                        })
930                        .ok()
931                } else {
932                    None
933                }
934            }
935            SortOrder::AscendingTimedelta64 => {
936                if let IndexLabel::Timedelta64(target) = needle {
937                    self.labels
938                        .binary_search_by(|label| {
939                            if let IndexLabel::Timedelta64(v) = label {
940                                v.cmp(target)
941                            } else {
942                                std::cmp::Ordering::Less
943                            }
944                        })
945                        .ok()
946                } else {
947                    None
948                }
949            }
950            SortOrder::AscendingDatetime64 => {
951                if let IndexLabel::Datetime64(target) = needle {
952                    self.labels
953                        .binary_search_by(|label| {
954                            if let IndexLabel::Datetime64(v) = label {
955                                v.cmp(target)
956                            } else {
957                                std::cmp::Ordering::Less
958                            }
959                        })
960                        .ok()
961                } else {
962                    None
963                }
964            }
965            SortOrder::Unsorted => self.labels.iter().position(|label| label == needle),
966        }
967    }
968
969    #[must_use]
970    pub fn position_map_first(&self) -> HashMap<IndexLabel, usize> {
971        let mut positions = HashMap::with_capacity(self.labels.len());
972        for (idx, label) in self.labels.iter().enumerate() {
973            positions.entry(label.clone()).or_insert(idx);
974        }
975        positions
976    }
977
978    fn position_map_first_ref(&self) -> FxHashMap<&IndexLabel, usize> {
979        let mut positions =
980            FxHashMap::with_capacity_and_hasher(self.labels.len(), Default::default());
981        for (idx, label) in self.labels.iter().enumerate() {
982            positions.entry(label).or_insert(idx);
983        }
984        positions
985    }
986
987    // ── Pandas Index Model: lookup and membership ──────────────────────
988
989    #[must_use]
990    pub fn contains(&self, label: &IndexLabel) -> bool {
991        self.position(label).is_some()
992    }
993
994    #[must_use]
995    pub fn get_indexer(&self, target: &Index) -> Vec<Option<usize>> {
996        // When `self` is strictly ascending (any SortOrder::Ascending* ⟹
997        // globally IndexLabel::Ord-sorted and unique) we can resolve target
998        // positions without building the O(n) FxHashMap of `self`
999        // (br-frankenpandas-idxdup):
1000        //   * target also sorted  ⇒ one two-pointer merge, O(n+m), no hashing;
1001        //   * target unsorted     ⇒ binary-search each label, O(m log n).
1002        // Both yield the same first-occurrence position the hash path returns
1003        // (uniqueness makes "first" the only one); unsorted `self` keeps the
1004        // hash path so a per-label scan never degrades to O(n·m).
1005        if !matches!(self.sort_order(), SortOrder::Unsorted) {
1006            let labels = self.labels();
1007            let targets = target.labels();
1008            if !matches!(target.sort_order(), SortOrder::Unsorted) {
1009                let mut out = Vec::with_capacity(targets.len());
1010                let mut i = 0usize;
1011                for label in targets {
1012                    while i < labels.len() && labels[i] < *label {
1013                        i += 1;
1014                    }
1015                    if i < labels.len() && labels[i] == *label {
1016                        out.push(Some(i));
1017                    } else {
1018                        out.push(None);
1019                    }
1020                }
1021                return out;
1022            }
1023            return targets.iter().map(|label| self.position(label)).collect();
1024        }
1025        let map = self.position_map_first_ref();
1026        target
1027            .labels
1028            .iter()
1029            .map(|label| map.get(label).copied())
1030            .collect()
1031    }
1032
1033    #[must_use]
1034    pub fn isin(&self, values: &[IndexLabel]) -> Vec<bool> {
1035        let set: FxHashMap<&IndexLabel, ()> = values.iter().map(|v| (v, ())).collect();
1036        self.labels.iter().map(|l| set.contains_key(l)).collect()
1037    }
1038
1039    // ── Pandas Index Model: deduplication ──────────────────────────────
1040
1041    #[must_use]
1042    pub fn is_monotonic_increasing(&self) -> bool {
1043        if self.labels.len() <= 1 {
1044            return true;
1045        }
1046        for pair in self.labels.windows(2) {
1047            if pair[0] > pair[1] {
1048                return false;
1049            }
1050        }
1051        true
1052    }
1053
1054    /// Alias for is_monotonic_increasing.
1055    #[must_use]
1056    pub fn is_monotonic(&self) -> bool {
1057        self.is_monotonic_increasing()
1058    }
1059
1060    #[must_use]
1061    pub fn is_monotonic_decreasing(&self) -> bool {
1062        if self.labels.len() <= 1 {
1063            return true;
1064        }
1065        for pair in self.labels.windows(2) {
1066            if pair[0] < pair[1] {
1067                return false;
1068            }
1069        }
1070        true
1071    }
1072
1073    #[must_use]
1074    pub fn unique(&self) -> Self {
1075        // A strictly-ascending index (every recognized SortOrder) is already
1076        // all-unique in first-seen order, so unique() is an identity — return an
1077        // O(1) Arc-sharing clone instead of hashing every label and rebuilding
1078        // the vector (br-frankenpandas-idxdup dedup family).
1079        if !matches!(self.sort_order(), SortOrder::Unsorted) {
1080            return self.clone();
1081        }
1082        let mut seen = FxHashMap::<&IndexLabel, ()>::default();
1083        let labels: Vec<IndexLabel> = self
1084            .labels
1085            .iter()
1086            .filter(|l| seen.insert(l, ()).is_none())
1087            .cloned()
1088            .collect();
1089        self.propagate_name(Self::new(labels))
1090    }
1091
1092    #[must_use]
1093    pub fn duplicated(&self, keep: DuplicateKeep) -> Vec<bool> {
1094        let mut result = vec![false; self.labels.len()];
1095        // Strictly-ascending => no duplicates under any keep mode; skip hashing.
1096        if !matches!(self.sort_order(), SortOrder::Unsorted) {
1097            return result;
1098        }
1099        match keep {
1100            DuplicateKeep::First => {
1101                let mut seen = FxHashMap::<&IndexLabel, ()>::default();
1102                for (i, label) in self.labels.iter().enumerate() {
1103                    if seen.insert(label, ()).is_some() {
1104                        result[i] = true;
1105                    }
1106                }
1107            }
1108            DuplicateKeep::Last => {
1109                let mut seen = FxHashMap::<&IndexLabel, ()>::default();
1110                for (i, label) in self.labels.iter().enumerate().rev() {
1111                    if seen.insert(label, ()).is_some() {
1112                        result[i] = true;
1113                    }
1114                }
1115            }
1116            DuplicateKeep::None => {
1117                let mut counts = FxHashMap::<&IndexLabel, usize>::default();
1118                for label in &self.labels {
1119                    *counts.entry(label).or_insert(0) += 1;
1120                }
1121                for (i, label) in self.labels.iter().enumerate() {
1122                    if counts[label] > 1 {
1123                        result[i] = true;
1124                    }
1125                }
1126            }
1127        }
1128        result
1129    }
1130
1131    #[must_use]
1132    pub fn drop_duplicates(&self) -> Self {
1133        self.drop_duplicates_keep(DuplicateKeep::First)
1134    }
1135
1136    /// Drop duplicated labels with explicit keep behavior.
1137    ///
1138    /// Matches `pd.Index.drop_duplicates(keep=...)`.
1139    #[must_use]
1140    pub fn drop_duplicates_keep(&self, keep: DuplicateKeep) -> Self {
1141        // Strictly-ascending => nothing is dropped; O(1) Arc-sharing clone.
1142        if !matches!(self.sort_order(), SortOrder::Unsorted) {
1143            return self.clone();
1144        }
1145        let duplicated = self.duplicated(keep);
1146        let labels = self
1147            .labels
1148            .iter()
1149            .zip(duplicated)
1150            .filter(|(_, is_duplicated)| !is_duplicated)
1151            .map(|(label, _)| label.clone())
1152            .collect();
1153        self.propagate_name(Self::new(labels))
1154    }
1155
1156    // ── Pandas Index Model: set operations ─────────────────────────────
1157
1158    #[must_use]
1159    pub fn intersection(&self, other: &Self) -> Self {
1160        // Both strictly ascending (every SortOrder::Ascending* is globally
1161        // IndexLabel::Ord-sorted and unique) => a two-pointer merge yields the
1162        // same self-ordered, deduplicated intersection without building either
1163        // FxHashMap (br-frankenpandas-idxdup set ops).
1164        if let Some(labels) = self.sorted_merge_set_op(other, SetMergeKind::Intersection) {
1165            let mut result = Self::new(labels);
1166            result.name = self.shared_name(other);
1167            return result;
1168        }
1169        let other_set = other.position_map_first_ref();
1170        let mut seen = FxHashMap::<&IndexLabel, ()>::default();
1171        let labels: Vec<IndexLabel> = self
1172            .labels
1173            .iter()
1174            .filter(|l| other_set.contains_key(l) && seen.insert(l, ()).is_none())
1175            .cloned()
1176            .collect();
1177        let mut result = Self::new(labels);
1178        result.name = self.shared_name(other);
1179        result
1180    }
1181
1182    /// Hash-free two-pointer set merge for two strictly-ascending (hence
1183    /// `IndexLabel::Ord`-sorted and unique) indexes; returns `None` when either
1184    /// side is unsorted so the caller keeps its FxHashMap path. Emits labels in
1185    /// `self`'s order, which equals the sorted order on the fast path — exactly
1186    /// what the hash path's `self`-iteration-order filter produces.
1187    fn sorted_merge_set_op(&self, other: &Self, kind: SetMergeKind) -> Option<Vec<IndexLabel>> {
1188        if matches!(self.sort_order(), SortOrder::Unsorted)
1189            || matches!(other.sort_order(), SortOrder::Unsorted)
1190        {
1191            return None;
1192        }
1193        let a = self.labels();
1194        let b = other.labels();
1195        let mut labels = Vec::with_capacity(a.len().min(b.len()));
1196        let (mut i, mut j) = (0usize, 0usize);
1197        while i < a.len() {
1198            if j >= b.len() {
1199                if kind == SetMergeKind::Difference {
1200                    labels.extend_from_slice(&a[i..]);
1201                }
1202                break;
1203            }
1204            match a[i].cmp(&b[j]) {
1205                std::cmp::Ordering::Less => {
1206                    if kind == SetMergeKind::Difference {
1207                        labels.push(a[i].clone());
1208                    }
1209                    i += 1;
1210                }
1211                std::cmp::Ordering::Greater => j += 1,
1212                std::cmp::Ordering::Equal => {
1213                    if kind == SetMergeKind::Intersection {
1214                        labels.push(a[i].clone());
1215                    }
1216                    i += 1;
1217                    j += 1;
1218                }
1219            }
1220        }
1221        Some(labels)
1222    }
1223
1224    #[must_use]
1225    pub fn union_with(&self, other: &Self) -> Self {
1226        let mut seen = FxHashMap::<&IndexLabel, ()>::default();
1227        let mut labels = Vec::with_capacity(self.labels.len() + other.labels.len());
1228        for label in self.labels.iter().chain(other.labels.iter()) {
1229            if seen.insert(label, ()).is_none() {
1230                labels.push(label.clone());
1231            }
1232        }
1233        let mut result = Self::new(labels);
1234        result.name = self.shared_name(other);
1235        result
1236    }
1237
1238    #[must_use]
1239    pub fn difference(&self, other: &Self) -> Self {
1240        // Two-pointer merge when both sides are strictly ascending (see
1241        // intersection / sorted_merge_set_op).
1242        if let Some(labels) = self.sorted_merge_set_op(other, SetMergeKind::Difference) {
1243            return self.propagate_name(Self::new(labels));
1244        }
1245        let other_set = other.position_map_first_ref();
1246        let mut seen = FxHashMap::<&IndexLabel, ()>::default();
1247        let labels: Vec<IndexLabel> = self
1248            .labels
1249            .iter()
1250            .filter(|l| !other_set.contains_key(l) && seen.insert(l, ()).is_none())
1251            .cloned()
1252            .collect();
1253        self.propagate_name(Self::new(labels))
1254    }
1255
1256    #[must_use]
1257    pub fn symmetric_difference(&self, other: &Self) -> Self {
1258        let self_set = self.position_map_first_ref();
1259        let other_set = other.position_map_first_ref();
1260        let mut seen = FxHashMap::<&IndexLabel, ()>::default();
1261        let mut labels = Vec::new();
1262        for label in &self.labels {
1263            if !other_set.contains_key(label) && seen.insert(label, ()).is_none() {
1264                labels.push(label.clone());
1265            }
1266        }
1267        for label in &other.labels {
1268            if !self_set.contains_key(label) && seen.insert(label, ()).is_none() {
1269                labels.push(label.clone());
1270            }
1271        }
1272        let mut result = Self::new(labels);
1273        result.name = self.shared_name(other);
1274        result
1275    }
1276
1277    // ── Pandas Index Model: ordering and slicing ───────────────────────
1278
1279    #[must_use]
1280    pub fn argsort(&self) -> Vec<usize> {
1281        let mut indices: Vec<usize> = (0..self.labels.len()).collect();
1282        indices.sort_by(|&a, &b| self.labels[a].cmp(&self.labels[b]));
1283        indices
1284    }
1285
1286    #[must_use]
1287    pub fn sort_values(&self) -> Self {
1288        let order = self.argsort();
1289        self.propagate_name(Self::new(
1290            order.iter().map(|&i| self.labels[i].clone()).collect(),
1291        ))
1292    }
1293
1294    #[must_use]
1295    pub fn take(&self, indices: &[usize]) -> Self {
1296        self.propagate_name(Self::new(
1297            indices.iter().map(|&i| self.labels[i].clone()).collect(),
1298        ))
1299    }
1300
1301    #[must_use]
1302    pub fn slice(&self, start: usize, len: usize) -> Self {
1303        let start = start.min(self.labels.len());
1304        let end = start.saturating_add(len).min(self.labels.len());
1305        self.propagate_name(Self::new(self.labels[start..end].to_vec()))
1306    }
1307
1308    #[must_use]
1309    pub fn from_range(start: i64, stop: i64, step: i64) -> Self {
1310        let mut labels = Vec::new();
1311        let mut val = start;
1312        if step > 0 {
1313            while val < stop {
1314                labels.push(IndexLabel::Int64(val));
1315                val += step;
1316            }
1317        } else if step < 0 {
1318            while val > stop {
1319                labels.push(IndexLabel::Int64(val));
1320                val += step;
1321            }
1322        }
1323        Self::new(labels)
1324    }
1325
1326    // ── Pandas Index Model: aggregation ──────────────────────────────
1327
1328    /// Minimum label.
1329    ///
1330    /// Matches `pd.Index.min()`.
1331    #[must_use]
1332    pub fn min(&self) -> Option<&IndexLabel> {
1333        self.labels.iter().min()
1334    }
1335
1336    /// Maximum label.
1337    ///
1338    /// Matches `pd.Index.max()`.
1339    #[must_use]
1340    pub fn max(&self) -> Option<&IndexLabel> {
1341        self.labels.iter().max()
1342    }
1343
1344    /// Position of the minimum label.
1345    ///
1346    /// Matches `pd.Index.argmin()`.
1347    #[must_use]
1348    pub fn argmin(&self) -> Option<usize> {
1349        self.labels
1350            .iter()
1351            .enumerate()
1352            .min_by(|(_, a), (_, b)| a.cmp(b))
1353            .map(|(i, _)| i)
1354    }
1355
1356    /// Position of the maximum label.
1357    ///
1358    /// Matches `pd.Index.argmax()`.
1359    #[must_use]
1360    pub fn argmax(&self) -> Option<usize> {
1361        self.labels
1362            .iter()
1363            .enumerate()
1364            .max_by(|(_, a), (_, b)| a.cmp(b))
1365            .map(|(i, _)| i)
1366    }
1367
1368    /// Number of unique labels.
1369    ///
1370    /// Matches `pd.Index.nunique()`.
1371    #[must_use]
1372    pub fn nunique(&self) -> usize {
1373        self.nunique_with_dropna(true)
1374    }
1375
1376    /// Number of unique labels with explicit missing-value control.
1377    ///
1378    /// Matches `pd.Index.nunique(dropna=...)`.
1379    #[must_use]
1380    pub fn nunique_with_dropna(&self, dropna: bool) -> usize {
1381        self.unique()
1382            .labels
1383            .iter()
1384            .filter(|label| !dropna || !label.is_missing())
1385            .count()
1386    }
1387
1388    // ── Pandas Index Model: transformation ───────────────────────────
1389
1390    /// Apply a function to each label, producing a new Index.
1391    ///
1392    /// Matches `pd.Index.map(func)`.
1393    #[must_use]
1394    pub fn map<F>(&self, func: F) -> Self
1395    where
1396        F: Fn(&IndexLabel) -> IndexLabel,
1397    {
1398        self.propagate_name(Self::new(self.labels.iter().map(&func).collect()))
1399    }
1400
1401    /// Rename the index (create a copy with transformed labels).
1402    ///
1403    /// Matches `pd.Index.rename(name)` / `pd.Index.set_names(name)`.
1404    /// Applies a function to each label.
1405    #[must_use]
1406    pub fn rename<F>(&self, func: F) -> Self
1407    where
1408        F: Fn(&IndexLabel) -> IndexLabel,
1409    {
1410        self.map(func)
1411    }
1412
1413    /// Drop specific labels from the index.
1414    ///
1415    /// Matches `pd.Index.drop(labels)`.
1416    #[must_use]
1417    pub fn drop_labels(&self, labels_to_drop: &[IndexLabel]) -> Self {
1418        self.propagate_name(Self::new(
1419            self.labels
1420                .iter()
1421                .filter(|l| !labels_to_drop.contains(l))
1422                .cloned()
1423                .collect(),
1424        ))
1425    }
1426
1427    /// Convert all labels to Int64 (if possible) or Utf8.
1428    ///
1429    /// Matches `pd.Index.astype(dtype)`. Returns a new Index with labels
1430    /// converted to the target type representation.
1431    #[must_use]
1432    pub fn astype_int(&self) -> Self {
1433        self.propagate_name(Self::new(
1434            self.labels
1435                .iter()
1436                .map(|l| match l {
1437                    IndexLabel::Int64(_) => l.clone(),
1438                    IndexLabel::Utf8(s) => s
1439                        .parse::<i64>()
1440                        .map_or_else(|_| l.clone(), IndexLabel::Int64),
1441                    IndexLabel::Timedelta64(ns) => IndexLabel::Int64(*ns),
1442                    IndexLabel::Datetime64(ns) => IndexLabel::Int64(*ns),
1443                    // Missing labels have no integer form; preserved like
1444                    // unparseable strings (pandas astype(int) raises on NaN —
1445                    // callers reject before reaching here).
1446                    IndexLabel::Null(_) => l.clone(),
1447                })
1448                .collect(),
1449        ))
1450    }
1451
1452    /// Convert all labels to Utf8 strings.
1453    ///
1454    /// Matches `pd.Index.astype(str)`.
1455    #[must_use]
1456    pub fn astype_str(&self) -> Self {
1457        self.propagate_name(Self::new(
1458            self.labels
1459                .iter()
1460                .map(|l| match l {
1461                    IndexLabel::Int64(v) => IndexLabel::Utf8(v.to_string()),
1462                    IndexLabel::Utf8(_) => l.clone(),
1463                    IndexLabel::Timedelta64(ns) => IndexLabel::Utf8(Timedelta::format(*ns)),
1464                    IndexLabel::Datetime64(ns) => IndexLabel::Utf8(format_datetime_ns(*ns)),
1465                    // astype(str) uses Python str() forms: str(None)=='None',
1466                    // str(nan)=='nan' (LOWERCASE, unlike the repr surface),
1467                    // str(NaT)=='NaT'. Verified pandas 2.2.3.
1468                    IndexLabel::Null(kind) => IndexLabel::Utf8(
1469                        match kind {
1470                            fp_types::NullKind::Null => "None",
1471                            fp_types::NullKind::NaN => "nan",
1472                            fp_types::NullKind::NaT => "NaT",
1473                        }
1474                        .to_owned(),
1475                    ),
1476                })
1477                .collect(),
1478        ))
1479    }
1480
1481    /// Convert labels to a pandas dtype string.
1482    ///
1483    /// Matches `pd.Index.astype(dtype)` for the generic dtype names this crate
1484    /// can represent directly.
1485    pub fn astype(&self, dtype: &str) -> Result<Self, IndexError> {
1486        match dtype {
1487            "int" | "int64" => Ok(self.astype_int()),
1488            "str" | "string" | "object" => Ok(self.astype_str()),
1489            "datetime64[ns]" => {
1490                ensure_index_kind(
1491                    self,
1492                    |label| matches!(label, IndexLabel::Datetime64(_)),
1493                    "DatetimeIndex",
1494                )?;
1495                Ok(self.clone())
1496            }
1497            "timedelta64[ns]" => {
1498                ensure_index_kind(
1499                    self,
1500                    |label| matches!(label, IndexLabel::Timedelta64(_)),
1501                    "TimedeltaIndex",
1502                )?;
1503                Ok(self.clone())
1504            }
1505            other => Err(IndexError::InvalidArgument(format!(
1506                "unsupported Index.astype dtype {other:?}"
1507            ))),
1508        }
1509    }
1510
1511    /// Equality check against another Index.
1512    ///
1513    /// Matches `pd.Index.equals(other)`. Returns true iff `other` has
1514    /// the same labels in the same order. Names are ignored (use
1515    /// `identical` for a name-sensitive check).
1516    #[must_use]
1517    pub fn equals(&self, other: &Self) -> bool {
1518        self.labels_equal(other)
1519    }
1520
1521    /// Strict equality including name.
1522    ///
1523    /// Matches `pd.Index.identical(other)`. Requires the same labels in
1524    /// the same order AND the same name.
1525    #[must_use]
1526    pub fn identical(&self, other: &Self) -> bool {
1527        self.labels_equal(other) && self.name == other.name
1528    }
1529
1530    fn value_counts_raw(
1531        &self,
1532        sort: bool,
1533        ascending: bool,
1534        dropna: bool,
1535    ) -> (Vec<(IndexLabel, usize)>, usize) {
1536        let mut seen_order: Vec<IndexLabel> = Vec::new();
1537        let mut counts: FxHashMap<IndexLabel, usize> = FxHashMap::default();
1538        let mut total = 0usize;
1539        for label in &self.labels {
1540            if dropna && label.is_missing() {
1541                continue;
1542            }
1543            total += 1;
1544            if !counts.contains_key(label) {
1545                seen_order.push(label.clone());
1546            }
1547            *counts.entry(label.clone()).or_insert(0) += 1;
1548        }
1549        let mut pairs: Vec<(IndexLabel, usize)> = seen_order
1550            .into_iter()
1551            .map(|label| {
1552                let count = counts[&label];
1553                (label, count)
1554            })
1555            .collect();
1556        if sort {
1557            if ascending {
1558                pairs.sort_by_key(|entry| entry.1);
1559            } else {
1560                pairs.sort_by_key(|entry| std::cmp::Reverse(entry.1));
1561            }
1562        }
1563        (pairs, total)
1564    }
1565
1566    /// Count occurrences of each distinct label.
1567    ///
1568    /// Matches `pd.Index.value_counts()` default behavior. Missing labels are
1569    /// dropped, counts are sorted descending, and first-seen order breaks ties.
1570    #[must_use]
1571    pub fn value_counts(&self) -> Vec<(IndexLabel, usize)> {
1572        self.value_counts_raw(true, false, true).0
1573    }
1574
1575    /// Count occurrences of each distinct label with pandas-style options.
1576    ///
1577    /// Matches `pd.Index.value_counts(normalize, sort, ascending, dropna)`.
1578    /// Returns `Scalar::Int64` counts unless `normalize=true`, in which case
1579    /// the values are `Scalar::Float64` fractions.
1580    #[must_use]
1581    pub fn value_counts_with_options(
1582        &self,
1583        normalize: bool,
1584        sort: bool,
1585        ascending: bool,
1586        dropna: bool,
1587    ) -> Vec<(IndexLabel, Scalar)> {
1588        let (pairs, total) = self.value_counts_raw(sort, ascending, dropna);
1589        if normalize {
1590            let denom = total as f64;
1591            return pairs
1592                .into_iter()
1593                .map(|(label, count)| (label, Scalar::Float64(count as f64 / denom)))
1594                .collect();
1595        }
1596
1597        pairs
1598            .into_iter()
1599            .map(|(label, count)| (label, Scalar::Int64(count as i64)))
1600            .collect()
1601    }
1602
1603    /// Shift the labels by `periods` positions, filling vacated slots
1604    /// with `fill`.
1605    ///
1606    /// Matches `pd.Index.shift(periods, fill_value=...)` for the
1607    /// positional form (pandas also supports a `freq`-aware shift for
1608    /// datetime indexes; that path is out of scope here). Positive
1609    /// periods shift right; negative shift left.
1610    #[must_use]
1611    pub fn shift(&self, periods: i64, fill: IndexLabel) -> Self {
1612        let len = self.labels.len();
1613        if len == 0 || periods == 0 {
1614            return self.clone();
1615        }
1616        let mut out: Vec<IndexLabel> = Vec::with_capacity(len);
1617        let abs = periods.unsigned_abs() as usize;
1618        if abs >= len {
1619            for _ in 0..len {
1620                out.push(fill.clone());
1621            }
1622        } else if periods > 0 {
1623            for _ in 0..abs {
1624                out.push(fill.clone());
1625            }
1626            out.extend_from_slice(&self.labels[..len - abs]);
1627        } else {
1628            out.extend_from_slice(&self.labels[abs..]);
1629            for _ in 0..abs {
1630                out.push(fill.clone());
1631            }
1632        }
1633        self.propagate_name(Self::new(out))
1634    }
1635
1636    /// Nearest-preceding-or-equal label lookup.
1637    ///
1638    /// Matches `pd.Index.asof(label)` for monotonic-increasing
1639    /// indexes: returns the largest label `<= key`. Returns `None`
1640    /// when no such label exists (key precedes every entry). The
1641    /// index is assumed sorted; callers should `sort_values()` first
1642    /// if needed (pandas emits a warning in the non-monotonic case
1643    /// but still does a linear scan — we match that behavior).
1644    #[must_use]
1645    pub fn asof(&self, key: &IndexLabel) -> Option<IndexLabel> {
1646        let mut best: Option<&IndexLabel> = None;
1647        for label in &self.labels {
1648            if label.is_missing() {
1649                continue;
1650            }
1651            if label.cmp(key).is_le() {
1652                best = Some(label);
1653            } else {
1654                break;
1655            }
1656        }
1657        best.cloned()
1658    }
1659
1660    /// Position where `value` would be inserted to keep the index
1661    /// sorted ascending.
1662    ///
1663    /// Matches `pd.Index.searchsorted(value, side)`. `side` is
1664    /// `"left"` (first valid insertion) or `"right"` (last). Returns
1665    /// an error for unknown sides or missing needles.
1666    pub fn searchsorted(&self, value: &IndexLabel, side: &str) -> Result<usize, IndexError> {
1667        if side != "left" && side != "right" {
1668            return Err(IndexError::InvalidArgument(format!(
1669                "searchsorted: side must be 'left' or 'right', got {side:?}"
1670            )));
1671        }
1672        if value.is_missing() {
1673            return Err(IndexError::InvalidArgument(
1674                "searchsorted: needle cannot be missing".to_owned(),
1675            ));
1676        }
1677        let mut lo = 0usize;
1678        let mut hi = self.labels.len();
1679        while lo < hi {
1680            let mid = lo + (hi - lo) / 2;
1681            let cmp = if self.labels[mid].is_missing() {
1682                std::cmp::Ordering::Greater
1683            } else {
1684                self.labels[mid].cmp(value)
1685            };
1686            use std::cmp::Ordering;
1687            let go_right = matches!(
1688                (cmp, side),
1689                (Ordering::Less, _) | (Ordering::Equal, "right")
1690            );
1691            if go_right {
1692                lo = mid + 1;
1693            } else {
1694                hi = mid;
1695            }
1696        }
1697        Ok(lo)
1698    }
1699
1700    /// Approximate memory footprint (bytes) occupied by the labels.
1701    ///
1702    /// Matches `pd.Index.memory_usage(deep=...)`. `deep=false` uses
1703    /// a fixed per-label width (8 bytes for Int64/Timedelta64/
1704    /// Datetime64, pointer-size for Utf8); `deep=true` additionally
1705    /// accounts for each Utf8 string's byte length.
1706    #[must_use]
1707    pub fn memory_usage(&self, deep: bool) -> usize {
1708        self.labels
1709            .iter()
1710            .map(|label| match label {
1711                IndexLabel::Int64(_)
1712                | IndexLabel::Timedelta64(_)
1713                | IndexLabel::Datetime64(_)
1714                | IndexLabel::Null(_) => 8,
1715                IndexLabel::Utf8(s) => {
1716                    if deep {
1717                        std::mem::size_of::<String>() + s.len()
1718                    } else {
1719                        std::mem::size_of::<String>()
1720                    }
1721                }
1722            })
1723            .sum()
1724    }
1725
1726    /// Number of levels in this index.
1727    ///
1728    /// Matches `pd.Index.nlevels`. Always 1 for the flat Index type;
1729    /// MultiIndex already overrides this. Provided so callers can
1730    /// write level-agnostic code that works on either kind.
1731    #[must_use]
1732    pub fn nlevels(&self) -> usize {
1733        1
1734    }
1735
1736    /// Materialize labels into an owned `Vec<IndexLabel>`.
1737    ///
1738    /// Matches `pd.Index.to_list()`. Convenience helper for callers
1739    /// that need ownership without manually cloning via `labels()`.
1740    #[must_use]
1741    pub fn to_list(&self) -> Vec<IndexLabel> {
1742        self.labels().to_vec()
1743    }
1744
1745    /// Stringify each label using its `Display` impl.
1746    ///
1747    /// Matches `pd.Index.format()` / `pd.Index.astype(str).tolist()`.
1748    /// Result is a `Vec<String>` in index order.
1749    #[must_use]
1750    pub fn format(&self) -> Vec<String> {
1751        self.labels.iter().map(IndexLabel::to_string).collect()
1752    }
1753
1754    /// Replace labels at positions where `cond` is true with `value`.
1755    ///
1756    /// Matches `pd.Index.putmask(cond, value)`. A shorter `cond`
1757    /// leaves trailing labels unchanged (pandas-style lenient
1758    /// alignment); a longer `cond` is silently truncated. The name
1759    /// is preserved.
1760    #[must_use]
1761    pub fn putmask(&self, cond: &[bool], value: &IndexLabel) -> Self {
1762        let new_labels: Vec<IndexLabel> = self
1763            .labels
1764            .iter()
1765            .enumerate()
1766            .map(|(i, label)| {
1767                if cond.get(i).copied().unwrap_or(false) {
1768                    value.clone()
1769                } else {
1770                    label.clone()
1771                }
1772            })
1773            .collect();
1774        self.propagate_name(Self::new(new_labels))
1775    }
1776
1777    /// Whether any label coerces to true.
1778    ///
1779    /// Matches `pd.Index.any()`. Non-zero integers, non-empty strings,
1780    /// and non-NaT timedeltas count as truthy. Missing labels are
1781    /// treated as falsy. Empty index returns false.
1782    #[must_use]
1783    pub fn any(&self) -> bool {
1784        self.labels.iter().any(index_label_is_truthy)
1785    }
1786
1787    /// Whether all labels coerce to true.
1788    ///
1789    /// Matches `pd.Index.all()`. Empty index returns true (pandas
1790    /// convention: vacuously true). Missing labels count as falsy.
1791    #[must_use]
1792    pub fn all(&self) -> bool {
1793        self.labels.iter().all(index_label_is_truthy)
1794    }
1795
1796    /// Drop missing labels, preserving order.
1797    ///
1798    /// Matches `pd.Index.dropna()`. Labels whose `is_missing()` returns
1799    /// true are removed. The name (if any) is preserved.
1800    #[must_use]
1801    pub fn dropna(&self) -> Self {
1802        self.propagate_name(Self::new(
1803            self.labels
1804                .iter()
1805                .filter(|label| !label.is_missing())
1806                .cloned()
1807                .collect(),
1808        ))
1809    }
1810
1811    /// Insert a new label at the given position.
1812    ///
1813    /// Matches `pd.Index.insert(loc, item)`. `loc` is an ordinal position
1814    /// where the new label is inserted; positions equal to `len()` append
1815    /// to the end. Out-of-bounds positions return an `OutOfBounds` error.
1816    pub fn insert(&self, loc: usize, item: IndexLabel) -> Result<Self, IndexError> {
1817        if loc > self.labels.len() {
1818            return Err(IndexError::OutOfBounds {
1819                position: loc,
1820                length: self.labels.len(),
1821            });
1822        }
1823        let mut labels = self.labels().to_vec();
1824        labels.insert(loc, item);
1825        Ok(self.propagate_name(Self::new(labels)))
1826    }
1827
1828    /// Delete the label at the given position.
1829    ///
1830    /// Matches `pd.Index.delete(loc)`. Returns an `OutOfBounds` error
1831    /// for positions outside `0..len()`.
1832    pub fn delete(&self, loc: usize) -> Result<Self, IndexError> {
1833        if loc >= self.labels.len() {
1834            return Err(IndexError::OutOfBounds {
1835                position: loc,
1836                length: self.labels.len(),
1837            });
1838        }
1839        let mut labels = self.labels().to_vec();
1840        labels.remove(loc);
1841        Ok(self.propagate_name(Self::new(labels)))
1842    }
1843
1844    /// Append another index to the end of this one.
1845    ///
1846    /// Matches `pd.Index.append(other)`. The returned index contains
1847    /// `self.labels` followed by `other.labels`. Name is preserved from
1848    /// `self`.
1849    #[must_use]
1850    pub fn append(&self, other: &Self) -> Self {
1851        let mut labels = self.labels().to_vec();
1852        labels.extend(other.labels.iter().cloned());
1853        self.propagate_name(Self::new(labels))
1854    }
1855
1856    /// Repeat each label `repeats` times.
1857    ///
1858    /// Matches `pd.Index.repeat(repeats)`. `repeats=0` yields an empty
1859    /// index; `repeats=1` is a no-op clone. Name is preserved.
1860    #[must_use]
1861    pub fn repeat(&self, repeats: usize) -> Self {
1862        if repeats == 0 {
1863            return self.propagate_name(Self::new(Vec::new()));
1864        }
1865        if repeats == 1 {
1866            return self.clone();
1867        }
1868        let mut out = Vec::with_capacity(self.labels.len() * repeats);
1869        for label in &self.labels {
1870            for _ in 0..repeats {
1871                out.push(label.clone());
1872            }
1873        }
1874        self.propagate_name(Self::new(out))
1875    }
1876
1877    /// Fill missing labels with the provided scalar.
1878    ///
1879    /// Matches `pd.Index.fillna(value)`.
1880    #[must_use]
1881    pub fn fillna(&self, value: &IndexLabel) -> Self {
1882        self.propagate_name(Self::new(
1883            self.labels
1884                .iter()
1885                .map(|label| {
1886                    if label.is_missing() {
1887                        value.clone()
1888                    } else {
1889                        label.clone()
1890                    }
1891                })
1892                .collect(),
1893        ))
1894    }
1895
1896    /// Matches `pd.Index.isna()`.
1897    #[must_use]
1898    pub fn isna(&self) -> Vec<bool> {
1899        self.labels.iter().map(IndexLabel::is_missing).collect()
1900    }
1901
1902    /// Matches `pd.Index.notna()`.
1903    #[must_use]
1904    pub fn notna(&self) -> Vec<bool> {
1905        self.labels
1906            .iter()
1907            .map(|label| !label.is_missing())
1908            .collect()
1909    }
1910
1911    /// Where: replace labels at false positions with a fill value.
1912    ///
1913    /// Matches `pd.Index.where(cond, other)`.
1914    #[must_use]
1915    pub fn where_cond(&self, cond: &[bool], other: &IndexLabel) -> Self {
1916        self.propagate_name(Self::new(
1917            self.labels
1918                .iter()
1919                .enumerate()
1920                .map(|(i, l)| {
1921                    if cond.get(i).copied().unwrap_or(false) {
1922                        l.clone()
1923                    } else {
1924                        other.clone()
1925                    }
1926                })
1927                .collect(),
1928        ))
1929    }
1930
1931    /// Alias for `union_with`, matching `pd.Index.union`.
1932    #[must_use]
1933    pub fn union(&self, other: &Self) -> Self {
1934        self.union_with(other)
1935    }
1936
1937    /// Alias for `sort_values`, matching `pd.Index.sort`.
1938    #[must_use]
1939    pub fn sort(&self) -> Self {
1940        self.sort_values()
1941    }
1942
1943    /// Sort labels and return the positional indexer used for the sort.
1944    ///
1945    /// Matches the flat-index shape of `pd.Index.sortlevel()`.
1946    #[must_use]
1947    pub fn sortlevel(&self) -> (Self, Vec<usize>) {
1948        let order = self.argsort();
1949        (self.take(&order), order)
1950    }
1951
1952    /// Alias for `drop_labels`, matching `pd.Index.drop`.
1953    #[must_use]
1954    pub fn drop(&self, labels_to_drop: &[IndexLabel]) -> Self {
1955        self.drop_labels(labels_to_drop)
1956    }
1957
1958    /// Clone this index, matching `pd.Index.copy`.
1959    #[must_use]
1960    pub fn copy(&self) -> Self {
1961        self.clone()
1962    }
1963
1964    /// Alias for `where_cond`, matching `pd.Index.where`.
1965    #[must_use]
1966    pub fn where_(&self, cond: &[bool], other: &IndexLabel) -> Self {
1967        self.where_cond(cond, other)
1968    }
1969
1970    /// Alias for `to_list`, matching `pd.Index.tolist`.
1971    #[must_use]
1972    pub fn tolist(&self) -> Vec<IndexLabel> {
1973        self.to_list()
1974    }
1975
1976    /// Object-array-shaped materialization, matching `pd.Index.to_numpy`.
1977    #[must_use]
1978    pub fn to_numpy(&self) -> Vec<IndexLabel> {
1979        self.to_list()
1980    }
1981
1982    /// Alias for `to_numpy`, matching `pd.Index.array`.
1983    #[must_use]
1984    pub fn array(&self) -> Vec<IndexLabel> {
1985        self.to_numpy()
1986    }
1987
1988    /// Alias for `to_numpy`, matching `pd.Index.values`.
1989    #[must_use]
1990    pub fn values(&self) -> Vec<IndexLabel> {
1991        self.to_numpy()
1992    }
1993
1994    /// Alias for `to_numpy`, matching `pd.Index.ravel`.
1995    #[must_use]
1996    pub fn ravel(&self) -> Vec<IndexLabel> {
1997        self.to_numpy()
1998    }
1999
2000    /// Return a shallow clone view, matching `pd.Index.view` for this
2001    /// immutable Rust representation.
2002    #[must_use]
2003    pub fn view(&self) -> Self {
2004        self.clone()
2005    }
2006
2007    /// Flat-index transpose is identity, matching `pd.Index.transpose`.
2008    #[must_use]
2009    pub fn transpose(&self) -> Self {
2010        self.clone()
2011    }
2012
2013    /// Alias for `transpose`, matching `pd.Index.T`.
2014    #[allow(non_snake_case)]
2015    #[must_use]
2016    pub fn T(&self) -> Self {
2017        self.transpose()
2018    }
2019
2020    /// One-column row materialization, matching the flat-index shape of
2021    /// `pd.Index.to_frame(index=False)`.
2022    #[must_use]
2023    pub fn to_frame(&self) -> Vec<Vec<IndexLabel>> {
2024        self.labels
2025            .iter()
2026            .map(|label| vec![label.clone()])
2027            .collect()
2028    }
2029
2030    /// Series-shaped materialization using the index labels as both index and
2031    /// values until `fp-frame` owns the richer return type.
2032    #[must_use]
2033    pub fn to_series(&self) -> Vec<(IndexLabel, IndexLabel)> {
2034        self.labels
2035            .iter()
2036            .map(|label| (label.clone(), label.clone()))
2037            .collect()
2038    }
2039
2040    /// Pandas dtype string for this flat index.
2041    #[must_use]
2042    pub fn dtype(&self) -> &'static str {
2043        match self.inferred_type() {
2044            "integer" => "int64",
2045            "string" => "object",
2046            "timedelta64" => "timedelta64[ns]",
2047            "datetime64" => "datetime64[ns]",
2048            "empty" | "mixed" => "object",
2049            _ => "object",
2050        }
2051    }
2052
2053    /// One-element dtype list, matching the `.dtypes` accessor shape used by
2054    /// pandas containers.
2055    #[must_use]
2056    pub fn dtypes(&self) -> Vec<&'static str> {
2057        vec![self.dtype()]
2058    }
2059
2060    /// Infer object labels without changing the current typed representation.
2061    #[must_use]
2062    pub fn infer_objects(&self) -> Self {
2063        self.clone()
2064    }
2065
2066    /// Whether this index's dtype can hold integer labels.
2067    #[must_use]
2068    pub fn holds_integer(&self) -> bool {
2069        self.is_integer()
2070    }
2071
2072    /// Pandas-style inferred-type string for the label values.
2073    #[must_use]
2074    pub fn inferred_type(&self) -> &'static str {
2075        if self.labels.is_empty() {
2076            return "empty";
2077        }
2078        let mut non_missing = self.labels.iter().filter(|label| !label.is_missing());
2079        let Some(first) = non_missing.next() else {
2080            return "empty";
2081        };
2082        let same_kind = |label: &IndexLabel| {
2083            matches!(
2084                (first, label),
2085                (IndexLabel::Int64(_), IndexLabel::Int64(_))
2086                    | (IndexLabel::Utf8(_), IndexLabel::Utf8(_))
2087                    | (IndexLabel::Timedelta64(_), IndexLabel::Timedelta64(_))
2088                    | (IndexLabel::Datetime64(_), IndexLabel::Datetime64(_))
2089            )
2090        };
2091        if !non_missing.all(same_kind) {
2092            return "mixed";
2093        }
2094        match first {
2095            IndexLabel::Int64(_) => "integer",
2096            IndexLabel::Utf8(_) => "string",
2097            IndexLabel::Timedelta64(_) => "timedelta64",
2098            IndexLabel::Datetime64(_) => "datetime64",
2099            // Unreachable: `first` comes from the non-missing iterator and
2100            // every Null label is_missing.
2101            IndexLabel::Null(_) => "mixed",
2102        }
2103    }
2104
2105    /// Whether this index contains missing labels, matching `pd.Index.hasnans`.
2106    #[must_use]
2107    pub fn hasnans(&self) -> bool {
2108        self.labels.iter().any(IndexLabel::is_missing)
2109    }
2110
2111    /// Number of dimensions, matching `pd.Index.ndim`.
2112    #[must_use]
2113    pub fn ndim(&self) -> usize {
2114        1
2115    }
2116
2117    /// One-dimensional shape, matching `pd.Index.shape`.
2118    #[must_use]
2119    pub fn shape(&self) -> (usize,) {
2120        (self.len(),)
2121    }
2122
2123    /// Number of entries, matching `pd.Index.size`.
2124    #[must_use]
2125    pub fn size(&self) -> usize {
2126        self.len()
2127    }
2128
2129    /// Shallow byte footprint, matching `pd.Index.nbytes`.
2130    #[must_use]
2131    pub fn nbytes(&self) -> usize {
2132        self.memory_usage(false)
2133    }
2134
2135    /// Alias for `is_empty`, matching the pandas `.empty` property.
2136    #[must_use]
2137    pub fn empty(&self) -> bool {
2138        self.is_empty()
2139    }
2140
2141    /// Return the single contained label.
2142    ///
2143    /// Matches `pd.Index.item()`, which rejects indexes with length other than
2144    /// one.
2145    pub fn item(&self) -> Result<IndexLabel, IndexError> {
2146        if self.len() == 1 {
2147            Ok(self.labels[0].clone())
2148        } else {
2149            Err(IndexError::InvalidArgument(format!(
2150                "item requires exactly one label, got {}",
2151                self.len()
2152            )))
2153        }
2154    }
2155
2156    /// Identity check, matching `pd.Index.is_`.
2157    #[must_use]
2158    pub fn is_(&self, other: &Self) -> bool {
2159        std::ptr::eq(self, other)
2160    }
2161
2162    /// Whether all non-missing labels are booleans.
2163    #[must_use]
2164    pub fn is_boolean(&self) -> bool {
2165        false
2166    }
2167
2168    /// Whether this generic index is categorical.
2169    #[must_use]
2170    pub fn is_categorical(&self) -> bool {
2171        false
2172    }
2173
2174    /// Whether all non-missing labels are floating-point.
2175    #[must_use]
2176    pub fn is_floating(&self) -> bool {
2177        false
2178    }
2179
2180    /// Whether all non-missing labels are Int64 labels.
2181    #[must_use]
2182    pub fn is_integer(&self) -> bool {
2183        !self.labels.is_empty()
2184            && self
2185                .labels
2186                .iter()
2187                .filter(|label| !label.is_missing())
2188                .all(|label| matches!(label, IndexLabel::Int64(_)))
2189    }
2190
2191    /// Whether this generic index is interval-typed.
2192    #[must_use]
2193    pub fn is_interval(&self) -> bool {
2194        false
2195    }
2196
2197    /// Whether all non-missing labels are numeric.
2198    #[must_use]
2199    pub fn is_numeric(&self) -> bool {
2200        self.is_integer()
2201    }
2202
2203    /// Whether this index is object-backed.
2204    #[must_use]
2205    pub fn is_object(&self) -> bool {
2206        matches!(self.dtype(), "object")
2207    }
2208
2209    /// Alias for `isna`, matching `pd.Index.isnull`.
2210    #[must_use]
2211    pub fn isnull(&self) -> Vec<bool> {
2212        self.isna()
2213    }
2214
2215    /// Alias for `notna`, matching `pd.Index.notnull`.
2216    #[must_use]
2217    pub fn notnull(&self) -> Vec<bool> {
2218        self.notna()
2219    }
2220
2221    /// Factorize labels into integer codes and unique labels.
2222    ///
2223    /// Missing labels receive code `-1`; non-missing labels preserve first-seen
2224    /// order in the returned uniques index.
2225    #[must_use]
2226    pub fn factorize(&self) -> (Vec<isize>, Self) {
2227        let mut positions = FxHashMap::<IndexLabel, isize>::default();
2228        let mut uniques = Vec::<IndexLabel>::new();
2229        let mut codes = Vec::with_capacity(self.labels.len());
2230        for label in &self.labels {
2231            if label.is_missing() {
2232                codes.push(-1);
2233            } else if let Some(code) = positions.get(label) {
2234                codes.push(*code);
2235            } else {
2236                let code = isize::try_from(uniques.len()).unwrap_or(isize::MAX);
2237                positions.insert(label.clone(), code);
2238                uniques.push(label.clone());
2239                codes.push(code);
2240            }
2241        }
2242        (codes, self.propagate_name(Self::new(uniques)))
2243    }
2244
2245    /// Alias for `get_indexer`, matching `pd.Index.get_indexer_for`.
2246    #[must_use]
2247    pub fn get_indexer_for(&self, target: &Self) -> Vec<Option<usize>> {
2248        self.get_indexer(target)
2249    }
2250
2251    /// Expand duplicate matches while indexing a target index.
2252    ///
2253    /// Matches `pd.Index.get_indexer_non_unique(target)` shape: every matching
2254    /// source position is emitted for each target label, and missing target
2255    /// ordinal positions are returned separately.
2256    #[must_use]
2257    pub fn get_indexer_non_unique(&self, target: &Self) -> (Vec<isize>, Vec<usize>) {
2258        let mut positions = FxHashMap::<IndexLabel, Vec<usize>>::default();
2259        for (position, label) in self.labels.iter().enumerate() {
2260            positions.entry(label.clone()).or_default().push(position);
2261        }
2262
2263        let mut indexer = Vec::new();
2264        let mut missing = Vec::new();
2265        for (target_position, label) in target.labels.iter().enumerate() {
2266            if let Some(source_positions) = positions.get(label) {
2267                indexer.extend(
2268                    source_positions
2269                        .iter()
2270                        .map(|position| isize::try_from(*position).unwrap_or(isize::MAX)),
2271                );
2272            } else {
2273                indexer.push(-1);
2274                missing.push(target_position);
2275            }
2276        }
2277        (indexer, missing)
2278    }
2279
2280    /// Get labels for a level. Flat indexes only accept level 0.
2281    pub fn get_level_values(&self, level: usize) -> Result<Self, IndexError> {
2282        if level == 0 {
2283            Ok(self.clone())
2284        } else {
2285            Err(IndexError::OutOfBounds {
2286                position: level,
2287                length: 1,
2288            })
2289        }
2290    }
2291
2292    /// Bound for a label slice, matching `pd.Index.get_slice_bound`.
2293    pub fn get_slice_bound(&self, label: &IndexLabel, side: &str) -> Result<usize, IndexError> {
2294        self.searchsorted(label, side)
2295    }
2296
2297    /// Return `(start, stop)` bounds for a label slice. Stop is exclusive.
2298    pub fn slice_locs(
2299        &self,
2300        start: Option<&IndexLabel>,
2301        end: Option<&IndexLabel>,
2302    ) -> Result<(usize, usize), IndexError> {
2303        let start = match start {
2304            Some(label) => self.get_slice_bound(label, "left")?,
2305            None => 0,
2306        };
2307        let end = match end {
2308            Some(label) => self.get_slice_bound(label, "right")?,
2309            None => self.len(),
2310        };
2311        Ok(if end < start {
2312            (start, start)
2313        } else {
2314            (start, end)
2315        })
2316    }
2317
2318    /// Alias for `slice_locs`, matching `pd.Index.slice_indexer`.
2319    pub fn slice_indexer(
2320        &self,
2321        start: Option<&IndexLabel>,
2322        end: Option<&IndexLabel>,
2323    ) -> Result<(usize, usize), IndexError> {
2324        self.slice_locs(start, end)
2325    }
2326
2327    /// Reindex to a target index, returning the target and source positions.
2328    #[must_use]
2329    pub fn reindex(&self, target: &Self) -> (Self, Vec<Option<usize>>) {
2330        (target.clone(), self.get_indexer(target))
2331    }
2332
2333    /// Flat-index `droplevel` is invalid because it would remove the only
2334    /// level.
2335    pub fn droplevel(&self, level: usize) -> Result<Self, IndexError> {
2336        if level == 0 {
2337            Err(IndexError::InvalidArgument(
2338                "cannot remove the only level from a flat Index".to_owned(),
2339            ))
2340        } else {
2341            Err(IndexError::OutOfBounds {
2342                position: level,
2343                length: 1,
2344            })
2345        }
2346    }
2347
2348    /// Rounding is a no-op for current discrete flat index labels.
2349    #[must_use]
2350    pub fn round(&self) -> Self {
2351        self.clone()
2352    }
2353
2354    /// String accessor for Utf8 labels, matching `pd.Index.str`.
2355    #[must_use]
2356    pub fn r#str(&self) -> IndexStringAccessor<'_> {
2357        IndexStringAccessor::borrowed(self)
2358    }
2359
2360    /// Group label positions by label value, matching `pd.Index.groupby`.
2361    #[must_use]
2362    pub fn groupby(&self) -> HashMap<IndexLabel, Vec<usize>> {
2363        let mut groups = HashMap::<IndexLabel, Vec<usize>>::new();
2364        for (position, label) in self.labels.iter().enumerate() {
2365            groups.entry(label.clone()).or_default().push(position);
2366        }
2367        groups
2368    }
2369
2370    /// Join two flat indexes using pandas-style join modes.
2371    pub fn join(&self, other: &Self, how: &str) -> Result<Self, IndexError> {
2372        match how {
2373            "left" => Ok(self.clone()),
2374            "right" => Ok(other.clone()),
2375            "inner" => Ok(self.intersection(other)),
2376            "outer" => Ok(self.union_with(other)),
2377            other => Err(IndexError::InvalidArgument(format!(
2378                "join: how must be 'left', 'right', 'inner', or 'outer', got {other:?}"
2379            ))),
2380        }
2381    }
2382
2383    /// Locate nearest preceding-or-equal positions for each target label.
2384    ///
2385    /// Matches `pd.Index.asof_locs(where, mask)` for monotonic flat indexes.
2386    #[must_use]
2387    pub fn asof_locs(&self, where_index: &Self, mask: Option<&[bool]>) -> Vec<Option<usize>> {
2388        where_index
2389            .labels
2390            .iter()
2391            .map(|key| {
2392                let mut best = None;
2393                for (position, label) in self.labels.iter().enumerate() {
2394                    if mask
2395                        .and_then(|values| values.get(position))
2396                        .is_some_and(|include| !include)
2397                    {
2398                        continue;
2399                    }
2400                    if label.is_missing() {
2401                        continue;
2402                    }
2403                    if label.cmp(key).is_le() {
2404                        best = Some(position);
2405                    } else {
2406                        break;
2407                    }
2408                }
2409                best
2410            })
2411            .collect()
2412    }
2413
2414    /// Positional first differences for comparable scalar index labels.
2415    ///
2416    /// Int64 and Timedelta64 labels produce same-kind differences. Datetime64
2417    /// labels produce Timedelta64 deltas. Unsupported label combinations and
2418    /// overflow return `None` for that position.
2419    #[must_use]
2420    pub fn diff(&self, periods: usize) -> Vec<Option<IndexLabel>> {
2421        let mut out = vec![None; self.len()];
2422        if periods == 0 {
2423            return out;
2424        }
2425        for (position, slot) in out.iter_mut().enumerate().skip(periods) {
2426            *slot = match (&self.labels[position], &self.labels[position - periods]) {
2427                (IndexLabel::Int64(current), IndexLabel::Int64(previous)) => {
2428                    current.checked_sub(*previous).map(IndexLabel::Int64)
2429                }
2430                (IndexLabel::Timedelta64(current), IndexLabel::Timedelta64(previous))
2431                    if *current != Timedelta::NAT && *previous != Timedelta::NAT =>
2432                {
2433                    current.checked_sub(*previous).map(IndexLabel::Timedelta64)
2434                }
2435                (IndexLabel::Datetime64(current), IndexLabel::Datetime64(previous))
2436                    if *current != i64::MIN && *previous != i64::MIN =>
2437                {
2438                    current.checked_sub(*previous).map(IndexLabel::Timedelta64)
2439                }
2440                _ => None,
2441            };
2442        }
2443        out
2444    }
2445}
2446
2447#[derive(Debug, Clone)]
2448pub struct IndexStringAccessor<'a> {
2449    index: Cow<'a, Index>,
2450}
2451
2452impl<'a> IndexStringAccessor<'a> {
2453    fn borrowed(index: &'a Index) -> Self {
2454        Self {
2455            index: Cow::Borrowed(index),
2456        }
2457    }
2458
2459    fn owned(index: Index) -> Self {
2460        Self {
2461            index: Cow::Owned(index),
2462        }
2463    }
2464
2465    fn map_utf8<T>(&self, func: impl Fn(&str) -> T) -> Vec<Option<T>> {
2466        self.index
2467            .labels()
2468            .iter()
2469            .map(|label| match label {
2470                IndexLabel::Utf8(value) => Some(func(value)),
2471                IndexLabel::Int64(_)
2472                | IndexLabel::Timedelta64(_)
2473                | IndexLabel::Datetime64(_)
2474                | IndexLabel::Null(_) => None,
2475            })
2476            .collect()
2477    }
2478
2479    /// Lowercase string labels.
2480    #[must_use]
2481    pub fn lower(&self) -> Vec<Option<String>> {
2482        self.map_utf8(str::to_lowercase)
2483    }
2484
2485    /// Uppercase string labels.
2486    #[must_use]
2487    pub fn upper(&self) -> Vec<Option<String>> {
2488        self.map_utf8(str::to_uppercase)
2489    }
2490
2491    /// Substring membership for string labels.
2492    #[must_use]
2493    pub fn contains(&self, needle: &str) -> Vec<Option<bool>> {
2494        self.map_utf8(|value| value.contains(needle))
2495    }
2496
2497    /// String length for string labels.
2498    #[must_use]
2499    pub fn len(&self) -> Vec<Option<usize>> {
2500        self.map_utf8(str::len)
2501    }
2502
2503    /// String emptiness for string labels.
2504    #[must_use]
2505    pub fn is_empty(&self) -> Vec<Option<bool>> {
2506        self.map_utf8(str::is_empty)
2507    }
2508}
2509
2510fn datetime_from_nanos(nanos: i64) -> Option<chrono::DateTime<chrono::Utc>> {
2511    if nanos == i64::MIN {
2512        return None;
2513    }
2514    let secs = nanos.div_euclid(1_000_000_000);
2515    let subsec_nanos = nanos.rem_euclid(1_000_000_000) as u32;
2516    chrono::DateTime::from_timestamp(secs, subsec_nanos)
2517}
2518
2519fn datetime_to_period_error(message: impl Into<String>) -> IndexError {
2520    IndexError::InvalidArgument(format!(
2521        "DatetimeIndex to_period failed: {}",
2522        message.into()
2523    ))
2524}
2525
2526fn date_to_weekly_period_ordinal(date: chrono::NaiveDate) -> Result<i64, IndexError> {
2527    let base = period_epoch_date(1969, 12, 22)?;
2528    Ok(date.signed_duration_since(base).num_days().div_euclid(7))
2529}
2530
2531fn business_period_anchor_date(date: chrono::NaiveDate) -> Result<chrono::NaiveDate, IndexError> {
2532    match date.weekday().num_days_from_monday() {
2533        5 => period_add_days(date, 2),
2534        6 => period_add_days(date, 1),
2535        _ => Ok(date),
2536    }
2537}
2538
2539fn date_to_business_period_ordinal(date: chrono::NaiveDate) -> Result<i64, IndexError> {
2540    let adjusted = business_period_anchor_date(date)?;
2541    let days = adjusted
2542        .signed_duration_since(period_epoch_date(1970, 1, 1)?)
2543        .num_days();
2544    let rem_ordinal = match days.rem_euclid(7) {
2545        0 => 0,
2546        1 => 1,
2547        4 => 2,
2548        5 => 3,
2549        6 => 4,
2550        _ => {
2551            return Err(datetime_to_period_error(
2552                "business period anchor did not land on a business day",
2553            ));
2554        }
2555    };
2556    days.div_euclid(7)
2557        .checked_mul(5)
2558        .and_then(|base| base.checked_add(rem_ordinal))
2559        .ok_or_else(|| datetime_to_period_error("business ordinal overflow"))
2560}
2561
2562fn business_period_end_anchor_date(
2563    date: chrono::NaiveDate,
2564) -> Result<chrono::NaiveDate, IndexError> {
2565    match date.weekday().num_days_from_monday() {
2566        5 => period_add_days(date, -1),
2567        6 => period_add_days(date, -2),
2568        _ => Ok(date),
2569    }
2570}
2571
2572fn datetime_period_ordinal(nanos: i64, freq: PeriodFreq) -> Result<i64, IndexError> {
2573    let dt = datetime_from_nanos(nanos).ok_or_else(|| {
2574        datetime_to_period_error(format!("invalid or NaT datetime nanos {nanos}"))
2575    })?;
2576    let date = dt.date_naive();
2577    let year_offset = i64::from(date.year()) - 1970;
2578    match freq {
2579        PeriodFreq::Annual => Ok(year_offset),
2580        PeriodFreq::Quarterly => year_offset
2581            .checked_mul(4)
2582            .and_then(|base| base.checked_add(i64::from((date.month() - 1) / 3)))
2583            .ok_or_else(|| datetime_to_period_error("quarterly ordinal overflow")),
2584        PeriodFreq::Monthly => year_offset
2585            .checked_mul(12)
2586            .and_then(|base| base.checked_add(i64::from(date.month() - 1)))
2587            .ok_or_else(|| datetime_to_period_error("monthly ordinal overflow")),
2588        PeriodFreq::Daily => {
2589            let epoch = chrono::NaiveDate::from_ymd_opt(1970, 1, 1)
2590                .ok_or_else(|| datetime_to_period_error("invalid epoch boundary"))?;
2591            Ok(date.signed_duration_since(epoch).num_days())
2592        }
2593        PeriodFreq::Hourly => Ok(nanos.div_euclid(Timedelta::NANOS_PER_HOUR)),
2594        PeriodFreq::Minutely => Ok(nanos.div_euclid(Timedelta::NANOS_PER_MIN)),
2595        PeriodFreq::Secondly => Ok(nanos.div_euclid(Timedelta::NANOS_PER_SEC)),
2596        PeriodFreq::Weekly => date_to_weekly_period_ordinal(date),
2597        PeriodFreq::Business => date_to_business_period_ordinal(date),
2598        _ => Err(datetime_to_period_error("unsupported period frequency")),
2599    }
2600}
2601
2602fn datetime_period_ordinal_at_boundary(
2603    nanos: i64,
2604    freq: PeriodFreq,
2605    boundary: PeriodBoundary,
2606) -> Result<i64, IndexError> {
2607    if freq == PeriodFreq::Business && matches!(boundary, PeriodBoundary::End) {
2608        let dt = datetime_from_nanos(nanos).ok_or_else(|| {
2609            datetime_to_period_error(format!("invalid or NaT datetime nanos {nanos}"))
2610        })?;
2611        return date_to_business_period_ordinal(business_period_end_anchor_date(dt.date_naive())?);
2612    }
2613    datetime_period_ordinal(nanos, freq)
2614}
2615
2616fn datetime_nanos_to_period(nanos: i64, freq: PeriodFreq) -> Result<Period, IndexError> {
2617    datetime_period_ordinal(nanos, freq).map(|ordinal| Period::new(ordinal, freq))
2618}
2619
2620fn map_datetime_labels<T, F>(labels: &[IndexLabel], func: F) -> Vec<Option<T>>
2621where
2622    F: Fn(chrono::DateTime<chrono::Utc>) -> T,
2623{
2624    labels
2625        .iter()
2626        .map(|label| match label {
2627            IndexLabel::Datetime64(nanos) => datetime_from_nanos(*nanos).map(&func),
2628            IndexLabel::Int64(_)
2629            | IndexLabel::Utf8(_)
2630            | IndexLabel::Timedelta64(_)
2631            | IndexLabel::Null(_) => None,
2632        })
2633        .collect()
2634}
2635
2636fn time_to_nanos(time: chrono::NaiveTime) -> i64 {
2637    use chrono::Timelike;
2638    i64::from(time.num_seconds_from_midnight()) * 1_000_000_000 + i64::from(time.nanosecond())
2639}
2640
2641fn parse_time_of_day_nanos(time: &str, context: &str) -> Result<i64, IndexError> {
2642    let trimmed = time.trim();
2643    for format in ["%H:%M:%S%.f", "%H:%M:%S", "%H:%M"] {
2644        if let Ok(parsed) = chrono::NaiveTime::parse_from_str(trimmed, format) {
2645            return Ok(time_to_nanos(parsed));
2646        }
2647    }
2648    Err(IndexError::InvalidArgument(format!(
2649        "{context}: invalid time {time:?}; expected HH:MM, HH:MM:SS, or fractional seconds"
2650    )))
2651}
2652
2653fn datetime_label_time_nanos(label: &IndexLabel) -> Option<i64> {
2654    match label {
2655        IndexLabel::Datetime64(nanos) => {
2656            datetime_from_nanos(*nanos).map(|dt| time_to_nanos(dt.time()))
2657        }
2658        IndexLabel::Int64(_)
2659        | IndexLabel::Utf8(_)
2660        | IndexLabel::Timedelta64(_)
2661        | IndexLabel::Null(_) => None,
2662    }
2663}
2664
2665fn time_nanos_in_between(
2666    time: i64,
2667    start: i64,
2668    end: i64,
2669    include_start: bool,
2670    include_end: bool,
2671) -> bool {
2672    let after_start = if include_start {
2673        time >= start
2674    } else {
2675        time > start
2676    };
2677    let before_end = if include_end { time <= end } else { time < end };
2678    if start <= end {
2679        after_start && before_end
2680    } else {
2681        after_start || before_end
2682    }
2683}
2684
2685fn map_timedelta_labels<T, F>(labels: &[IndexLabel], func: F) -> Vec<Option<T>>
2686where
2687    F: Fn(i64) -> T,
2688{
2689    labels
2690        .iter()
2691        .map(|label| match label {
2692            IndexLabel::Timedelta64(nanos) if *nanos != Timedelta::NAT => Some(func(*nanos)),
2693            IndexLabel::Int64(_)
2694            | IndexLabel::Utf8(_)
2695            | IndexLabel::Timedelta64(_)
2696            | IndexLabel::Datetime64(_)
2697            | IndexLabel::Null(_) => None,
2698        })
2699        .collect()
2700}
2701
2702fn timedelta_components_for_index(nanos: i64) -> TimedeltaComponents {
2703    let days = nanos.div_euclid(Timedelta::NANOS_PER_DAY);
2704    let rem = nanos.rem_euclid(Timedelta::NANOS_PER_DAY);
2705
2706    let hours = rem / Timedelta::NANOS_PER_HOUR;
2707    let rem = rem % Timedelta::NANOS_PER_HOUR;
2708    let minutes = rem / Timedelta::NANOS_PER_MIN;
2709    let rem = rem % Timedelta::NANOS_PER_MIN;
2710    let seconds = rem / Timedelta::NANOS_PER_SEC;
2711    let rem = rem % Timedelta::NANOS_PER_SEC;
2712    let milliseconds = rem / Timedelta::NANOS_PER_MILLI;
2713    let rem = rem % Timedelta::NANOS_PER_MILLI;
2714    let microseconds = rem / Timedelta::NANOS_PER_MICRO;
2715    let nanoseconds = rem % Timedelta::NANOS_PER_MICRO;
2716
2717    TimedeltaComponents {
2718        days,
2719        hours,
2720        minutes,
2721        seconds,
2722        milliseconds,
2723        microseconds,
2724        nanoseconds,
2725    }
2726}
2727
2728#[derive(Clone, Copy)]
2729enum TemporalRoundMode {
2730    Floor,
2731    Ceil,
2732    Round,
2733}
2734
2735#[derive(Clone, Copy)]
2736enum PeriodBoundary {
2737    Start,
2738    End,
2739}
2740
2741fn parse_fixed_temporal_freq(freq: &str, context: &str) -> Result<i64, IndexError> {
2742    let trimmed = freq.trim();
2743    let unit_nanos = Timedelta::unit_to_nanos(trimmed)
2744        .or_else(|| Timedelta::parse(trimmed).ok())
2745        .ok_or_else(|| {
2746            IndexError::InvalidArgument(format!("{context}: invalid frequency {freq:?}"))
2747        })?;
2748    if unit_nanos <= 0 {
2749        return Err(IndexError::InvalidArgument(format!(
2750            "{context}: frequency must be positive, got {freq:?}"
2751        )));
2752    }
2753    Ok(unit_nanos)
2754}
2755
2756fn round_nanos_to_unit(nanos: i64, unit_nanos: i64, mode: TemporalRoundMode) -> i64 {
2757    match mode {
2758        TemporalRoundMode::Floor => nanos.div_euclid(unit_nanos).saturating_mul(unit_nanos),
2759        TemporalRoundMode::Ceil => {
2760            let rem = nanos.rem_euclid(unit_nanos);
2761            if rem == 0 {
2762                nanos
2763            } else {
2764                nanos.saturating_add(unit_nanos - rem)
2765            }
2766        }
2767        TemporalRoundMode::Round => {
2768            let floor = nanos.div_euclid(unit_nanos);
2769            let rem = nanos.rem_euclid(unit_nanos);
2770            if rem == 0 {
2771                return nanos;
2772            }
2773            let twice_rem = i128::from(rem) * 2;
2774            let unit = i128::from(unit_nanos);
2775            let chosen = if twice_rem < unit {
2776                floor
2777            } else if twice_rem > unit {
2778                floor.saturating_add(1)
2779            } else if floor % 2 == 0 {
2780                floor
2781            } else {
2782                floor.saturating_add(1)
2783            };
2784            chosen.saturating_mul(unit_nanos)
2785        }
2786    }
2787}
2788
2789fn positional_diff<T>(
2790    len: usize,
2791    periods: i64,
2792    mut diff_at: impl FnMut(usize, usize) -> Option<T>,
2793) -> Vec<Option<T>> {
2794    let mut out = (0..len).map(|_| None).collect::<Vec<_>>();
2795    if periods == 0 {
2796        for (position, slot) in out.iter_mut().enumerate() {
2797            *slot = diff_at(position, position);
2798        }
2799        return out;
2800    }
2801    let Ok(offset) = usize::try_from(periods.unsigned_abs()) else {
2802        return out;
2803    };
2804    if offset >= len {
2805        return out;
2806    }
2807    if periods > 0 {
2808        for (position, slot) in out.iter_mut().enumerate().skip(offset) {
2809            *slot = diff_at(position, position - offset);
2810        }
2811    } else {
2812        for (position, slot) in out.iter_mut().enumerate().take(len - offset) {
2813            *slot = diff_at(position, position + offset);
2814        }
2815    }
2816    out
2817}
2818
2819fn optional_diffs_to_timedelta_index(
2820    values: Vec<Option<i64>>,
2821    name: Option<&str>,
2822) -> TimedeltaIndex {
2823    let mut out = TimedeltaIndex::new(
2824        values
2825            .into_iter()
2826            .map(|value| value.unwrap_or(Timedelta::NAT))
2827            .collect(),
2828    );
2829    if let Some(name) = name {
2830        out = out.set_name(name);
2831    }
2832    out
2833}
2834
2835fn period_timestamp_error(message: impl Into<String>) -> IndexError {
2836    IndexError::InvalidArgument(format!(
2837        "PeriodIndex timestamp conversion failed: {}",
2838        message.into()
2839    ))
2840}
2841
2842fn period_date_error(err: DateRangeError) -> IndexError {
2843    period_timestamp_error(err.to_string())
2844}
2845
2846fn period_date_to_nanos(date: chrono::NaiveDate) -> Result<i64, IndexError> {
2847    date_to_midnight_nanos(date).map_err(period_date_error)
2848}
2849
2850fn period_checked_add_nanos(nanos: i64, delta: i64) -> Result<i64, IndexError> {
2851    nanos
2852        .checked_add(delta)
2853        .ok_or_else(|| period_timestamp_error("nanosecond timestamp overflow"))
2854}
2855
2856fn period_month_start(month_ordinal: i64) -> Result<chrono::NaiveDate, IndexError> {
2857    let year = 1970_i64
2858        .checked_add(month_ordinal.div_euclid(12))
2859        .ok_or_else(|| period_timestamp_error("year overflow"))?;
2860    let year = i32::try_from(year).map_err(|_| period_timestamp_error("year out of range"))?;
2861    let month = u32::try_from(month_ordinal.rem_euclid(12) + 1)
2862        .map_err(|_| period_timestamp_error("month out of range"))?;
2863    chrono::NaiveDate::from_ymd_opt(year, month, 1)
2864        .ok_or_else(|| period_timestamp_error("invalid month boundary"))
2865}
2866
2867fn period_epoch_date(year: i32, month: u32, day: u32) -> Result<chrono::NaiveDate, IndexError> {
2868    chrono::NaiveDate::from_ymd_opt(year, month, day)
2869        .ok_or_else(|| period_timestamp_error("invalid epoch boundary"))
2870}
2871
2872fn period_add_days(date: chrono::NaiveDate, days: i64) -> Result<chrono::NaiveDate, IndexError> {
2873    let delta = chrono::Duration::try_days(days)
2874        .ok_or_else(|| period_timestamp_error("day offset overflow"))?;
2875    date.checked_add_signed(delta)
2876        .ok_or_else(|| period_timestamp_error("date overflow"))
2877}
2878
2879fn period_business_date(ordinal: i64) -> Result<chrono::NaiveDate, IndexError> {
2880    let week = ordinal.div_euclid(5);
2881    let day_offset = match ordinal.rem_euclid(5) {
2882        0 => 0,
2883        1 => 1,
2884        2 => 4,
2885        3 => 5,
2886        4 => 6,
2887        _ => {
2888            return Err(period_timestamp_error(
2889                "business-day remainder out of range",
2890            ));
2891        }
2892    };
2893    let calendar_days = week
2894        .checked_mul(7)
2895        .and_then(|days| days.checked_add(day_offset))
2896        .ok_or_else(|| period_timestamp_error("business-day ordinal overflow"))?;
2897    period_add_days(period_epoch_date(1970, 1, 1)?, calendar_days)
2898}
2899
2900fn period_start_nanos(period: Period) -> Result<i64, IndexError> {
2901    match period.freq {
2902        PeriodFreq::Annual => {
2903            let month_ordinal = period
2904                .ordinal
2905                .checked_mul(12)
2906                .ok_or_else(|| period_timestamp_error("annual ordinal overflow"))?;
2907            period_date_to_nanos(period_month_start(month_ordinal)?)
2908        }
2909        PeriodFreq::Quarterly => {
2910            let month_ordinal = period
2911                .ordinal
2912                .checked_mul(3)
2913                .ok_or_else(|| period_timestamp_error("quarterly ordinal overflow"))?;
2914            period_date_to_nanos(period_month_start(month_ordinal)?)
2915        }
2916        PeriodFreq::Monthly => period_date_to_nanos(period_month_start(period.ordinal)?),
2917        PeriodFreq::Weekly => {
2918            let base = period_epoch_date(1969, 12, 22)?;
2919            let days = period
2920                .ordinal
2921                .checked_mul(7)
2922                .ok_or_else(|| period_timestamp_error("weekly ordinal overflow"))?;
2923            period_date_to_nanos(period_add_days(base, days)?)
2924        }
2925        PeriodFreq::Daily => {
2926            let base = period_epoch_date(1970, 1, 1)?;
2927            period_date_to_nanos(period_add_days(base, period.ordinal)?)
2928        }
2929        PeriodFreq::Business => period_date_to_nanos(period_business_date(period.ordinal)?),
2930        PeriodFreq::Hourly => period
2931            .ordinal
2932            .checked_mul(Timedelta::NANOS_PER_HOUR)
2933            .ok_or_else(|| period_timestamp_error("hourly ordinal overflow")),
2934        PeriodFreq::Minutely => period
2935            .ordinal
2936            .checked_mul(Timedelta::NANOS_PER_MIN)
2937            .ok_or_else(|| period_timestamp_error("minutely ordinal overflow")),
2938        PeriodFreq::Secondly => period
2939            .ordinal
2940            .checked_mul(Timedelta::NANOS_PER_SEC)
2941            .ok_or_else(|| period_timestamp_error("secondly ordinal overflow")),
2942        _ => Err(period_timestamp_error("unsupported period frequency")),
2943    }
2944}
2945
2946fn period_next_start_nanos(period: Period) -> Result<i64, IndexError> {
2947    let next = Period {
2948        ordinal: period
2949            .ordinal
2950            .checked_add(1)
2951            .ok_or_else(|| period_timestamp_error("period ordinal overflow"))?,
2952        freq: period.freq,
2953    };
2954    period_start_nanos(next)
2955}
2956
2957fn period_end_nanos(period: Period) -> Result<i64, IndexError> {
2958    period_checked_add_nanos(period_next_start_nanos(period)?, -1)
2959}
2960
2961fn period_boundary_nanos(period: Period, boundary: PeriodBoundary) -> Result<i64, IndexError> {
2962    match boundary {
2963        PeriodBoundary::Start => period_start_nanos(period),
2964        PeriodBoundary::End => period_end_nanos(period),
2965    }
2966}
2967
2968fn parse_period_boundary_how(how: &str, context: &str) -> Result<PeriodBoundary, IndexError> {
2969    match how.trim().to_ascii_lowercase().as_str() {
2970        "" | "e" | "end" | "finish" => Ok(PeriodBoundary::End),
2971        "s" | "start" | "begin" | "b" => Ok(PeriodBoundary::Start),
2972        other => Err(IndexError::InvalidArgument(format!(
2973            "{context} how must be 'start' or 'end', got {other:?}"
2974        ))),
2975    }
2976}
2977
2978fn period_qyear(period: Period) -> Result<i32, IndexError> {
2979    let end_nanos = period_end_nanos(period)?;
2980    datetime_nanos_to_date(end_nanos)
2981        .map(|date| date.year())
2982        .map_err(period_date_error)
2983}
2984
2985#[derive(Debug, Clone, Copy)]
2986pub struct PeriodFields<'a> {
2987    pub year: &'a [i32],
2988    pub quarter: Option<&'a [u32]>,
2989    pub month: Option<&'a [u32]>,
2990    pub day: Option<&'a [u32]>,
2991    pub hour: Option<&'a [u32]>,
2992    pub minute: Option<&'a [u32]>,
2993    pub second: Option<&'a [u32]>,
2994    pub freq: Option<PeriodFreq>,
2995}
2996
2997impl<'a> PeriodFields<'a> {
2998    #[must_use]
2999    pub const fn new(year: &'a [i32]) -> Self {
3000        Self {
3001            year,
3002            quarter: None,
3003            month: None,
3004            day: None,
3005            hour: None,
3006            minute: None,
3007            second: None,
3008            freq: None,
3009        }
3010    }
3011}
3012
3013fn period_fields_error(message: impl Into<String>) -> IndexError {
3014    IndexError::InvalidArgument(format!(
3015        "PeriodIndex.from_fields failed: {}",
3016        message.into()
3017    ))
3018}
3019
3020fn period_fields_freq(fields: &PeriodFields<'_>) -> Result<PeriodFreq, IndexError> {
3021    let freq = fields
3022        .freq
3023        .or_else(|| fields.quarter.map(|_| PeriodFreq::Quarterly))
3024        .ok_or_else(|| {
3025            period_fields_error("freq is required unless quarter fields imply quarterly periods")
3026        })?;
3027    if fields.quarter.is_some() && freq != PeriodFreq::Quarterly {
3028        return Err(period_fields_error(
3029            "quarter fields require quarterly frequency",
3030        ));
3031    }
3032    Ok(freq)
3033}
3034
3035fn validate_period_field_len(
3036    name: &str,
3037    values: Option<&[u32]>,
3038    expected: usize,
3039) -> Result<(), IndexError> {
3040    if values.is_some_and(|items| items.len() != expected) {
3041        return Err(period_fields_error(format!(
3042            "Mismatched Period array lengths for {name}"
3043        )));
3044    }
3045    Ok(())
3046}
3047
3048fn validate_period_fields(fields: &PeriodFields<'_>) -> Result<(), IndexError> {
3049    let expected = fields.year.len();
3050    validate_period_field_len("quarter", fields.quarter, expected)?;
3051    validate_period_field_len("month", fields.month, expected)?;
3052    validate_period_field_len("day", fields.day, expected)?;
3053    validate_period_field_len("hour", fields.hour, expected)?;
3054    validate_period_field_len("minute", fields.minute, expected)?;
3055    validate_period_field_len("second", fields.second, expected)
3056}
3057
3058fn period_field_value(values: Option<&[u32]>, position: usize, default: u32) -> u32 {
3059    values
3060        .and_then(|items| items.get(position).copied())
3061        .unwrap_or(default)
3062}
3063
3064fn required_period_field(
3065    values: Option<&[u32]>,
3066    name: &str,
3067    position: usize,
3068) -> Result<u32, IndexError> {
3069    values
3070        .and_then(|items| items.get(position).copied())
3071        .ok_or_else(|| period_fields_error(format!("{name} fields are required")))
3072}
3073
3074fn quarter_start_month(quarter: u32) -> Result<u32, IndexError> {
3075    if (1..=4).contains(&quarter) {
3076        Ok((quarter - 1) * 3 + 1)
3077    } else {
3078        Err(period_fields_error(format!(
3079            "quarter must be in 1..=4, got {quarter}"
3080        )))
3081    }
3082}
3083
3084fn period_from_fields_at(
3085    fields: &PeriodFields<'_>,
3086    freq: PeriodFreq,
3087    position: usize,
3088) -> Result<Period, IndexError> {
3089    let year = fields
3090        .year
3091        .get(position)
3092        .copied()
3093        .ok_or_else(|| period_fields_error("year fields are required"))?;
3094    let month = if freq == PeriodFreq::Quarterly {
3095        if let Some(quarters) = fields.quarter {
3096            let quarter = quarters
3097                .get(position)
3098                .copied()
3099                .ok_or_else(|| period_fields_error("quarter fields are required"))?;
3100            quarter_start_month(quarter)?
3101        } else {
3102            required_period_field(fields.month, "month", position)?
3103        }
3104    } else {
3105        if fields.quarter.is_some() && fields.month.is_none() {
3106            return Err(period_fields_error(
3107                "quarter fields require quarterly frequency unless month is also supplied",
3108            ));
3109        }
3110        required_period_field(fields.month, "month", position)?
3111    };
3112    let day = if matches!(
3113        freq,
3114        PeriodFreq::Annual | PeriodFreq::Quarterly | PeriodFreq::Monthly
3115    ) {
3116        1
3117    } else {
3118        period_field_value(fields.day, position, 1)
3119    };
3120    let hour = if matches!(
3121        freq,
3122        PeriodFreq::Hourly | PeriodFreq::Minutely | PeriodFreq::Secondly
3123    ) {
3124        period_field_value(fields.hour, position, 0)
3125    } else {
3126        0
3127    };
3128    let minute = if matches!(freq, PeriodFreq::Minutely | PeriodFreq::Secondly) {
3129        period_field_value(fields.minute, position, 0)
3130    } else {
3131        0
3132    };
3133    let second = if freq == PeriodFreq::Secondly {
3134        period_field_value(fields.second, position, 0)
3135    } else {
3136        0
3137    };
3138    let date = chrono::NaiveDate::from_ymd_opt(year, month, day)
3139        .ok_or_else(|| period_fields_error("invalid year/month/day combination"))?;
3140    let time = chrono::NaiveTime::from_hms_opt(hour, minute, second)
3141        .ok_or_else(|| period_fields_error("invalid hour/minute/second combination"))?;
3142    let nanos = date_and_time_to_nanos(date, time_to_nanos(time)).map_err(period_date_error)?;
3143    datetime_period_ordinal(nanos, freq).map(|ordinal| Period::new(ordinal, freq))
3144}
3145
3146fn ensure_index_kind(
3147    index: &Index,
3148    predicate: impl Fn(&IndexLabel) -> bool,
3149    kind: &str,
3150) -> Result<(), IndexError> {
3151    if index.labels().iter().all(predicate) {
3152        Ok(())
3153    } else {
3154        Err(IndexError::InvalidArgument(format!(
3155            "{kind} requires homogeneous {kind} labels"
3156        )))
3157    }
3158}
3159
3160/// Public pandas-style datetime index wrapper.
3161///
3162/// The canonical storage remains [`Index`] with `Datetime64` labels so existing
3163/// DataFrame/Series alignment code keeps one representation. This wrapper adds
3164/// the type-level public surface pandas users expect (`DatetimeIndex`) and a
3165/// small first slice of datetime accessors.
3166#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
3167pub struct DatetimeIndex {
3168    index: Index,
3169}
3170
3171impl DatetimeIndex {
3172    #[must_use]
3173    pub fn new(nanos: Vec<i64>) -> Self {
3174        Self {
3175            index: Index::from_datetime64(nanos),
3176        }
3177    }
3178
3179    pub fn from_index(index: Index) -> Result<Self, IndexError> {
3180        ensure_index_kind(
3181            &index,
3182            |label| matches!(label, IndexLabel::Datetime64(_)),
3183            "DatetimeIndex",
3184        )?;
3185        Ok(Self { index })
3186    }
3187
3188    #[must_use]
3189    pub fn as_index(&self) -> &Index {
3190        &self.index
3191    }
3192
3193    #[must_use]
3194    pub fn into_index(self) -> Index {
3195        self.index
3196    }
3197
3198    #[must_use]
3199    pub fn len(&self) -> usize {
3200        self.index.len()
3201    }
3202
3203    #[must_use]
3204    pub fn is_empty(&self) -> bool {
3205        self.index.is_empty()
3206    }
3207
3208    #[must_use]
3209    pub fn name(&self) -> Option<&str> {
3210        self.index.name()
3211    }
3212
3213    #[must_use]
3214    pub fn set_name(&self, name: &str) -> Self {
3215        Self {
3216            index: self.index.set_name(name),
3217        }
3218    }
3219
3220    #[must_use]
3221    pub fn set_names(&self, name: Option<&str>) -> Self {
3222        Self {
3223            index: self.index.set_names(name),
3224        }
3225    }
3226
3227    #[must_use]
3228    pub fn rename_index(&self, name: Option<&str>) -> Self {
3229        self.set_names(name)
3230    }
3231
3232    #[must_use]
3233    pub fn names(&self) -> Vec<Option<String>> {
3234        self.index.names()
3235    }
3236
3237    #[must_use]
3238    pub fn copy(&self) -> Self {
3239        self.clone()
3240    }
3241
3242    #[must_use]
3243    pub fn shape(&self) -> (usize,) {
3244        self.index.shape()
3245    }
3246
3247    #[must_use]
3248    pub fn size(&self) -> usize {
3249        self.index.size()
3250    }
3251
3252    #[must_use]
3253    pub fn empty(&self) -> bool {
3254        self.index.empty()
3255    }
3256
3257    #[must_use]
3258    pub fn dtype(&self) -> &'static str {
3259        "datetime64[ns]"
3260    }
3261
3262    #[must_use]
3263    pub fn dtypes(&self) -> Vec<&'static str> {
3264        vec![self.dtype()]
3265    }
3266
3267    #[must_use]
3268    pub fn memory_usage(&self, deep: bool) -> usize {
3269        self.index.memory_usage(deep)
3270    }
3271
3272    #[must_use]
3273    pub fn nbytes(&self) -> usize {
3274        self.index.nbytes()
3275    }
3276
3277    #[must_use]
3278    pub fn hasnans(&self) -> bool {
3279        self.index.hasnans()
3280    }
3281
3282    #[must_use]
3283    pub fn isna(&self) -> Vec<bool> {
3284        self.index.isna()
3285    }
3286
3287    #[must_use]
3288    pub fn notna(&self) -> Vec<bool> {
3289        self.index.notna()
3290    }
3291
3292    #[must_use]
3293    pub fn is_unique(&self) -> bool {
3294        self.index.is_unique()
3295    }
3296
3297    #[must_use]
3298    pub fn has_duplicates(&self) -> bool {
3299        self.index.has_duplicates()
3300    }
3301
3302    #[must_use]
3303    pub fn is_monotonic_increasing(&self) -> bool {
3304        self.index.is_monotonic_increasing()
3305    }
3306
3307    #[must_use]
3308    pub fn is_monotonic(&self) -> bool {
3309        self.index.is_monotonic()
3310    }
3311
3312    #[must_use]
3313    pub fn is_monotonic_decreasing(&self) -> bool {
3314        self.index.is_monotonic_decreasing()
3315    }
3316
3317    #[must_use]
3318    pub fn nunique(&self) -> usize {
3319        self.index.nunique()
3320    }
3321
3322    #[must_use]
3323    pub fn nunique_with_dropna(&self, dropna: bool) -> usize {
3324        self.index.nunique_with_dropna(dropna)
3325    }
3326
3327    #[must_use]
3328    pub fn ndim(&self) -> usize {
3329        self.index.ndim()
3330    }
3331
3332    pub fn item(&self) -> Result<Option<i64>, IndexError> {
3333        match self.index.item()? {
3334            IndexLabel::Datetime64(nanos) if nanos != i64::MIN => Ok(Some(nanos)),
3335            IndexLabel::Datetime64(_) => Ok(None),
3336            label => Err(IndexError::InvalidArgument(format!(
3337                "DatetimeIndex item must be datetime64, got {label}"
3338            ))),
3339        }
3340    }
3341
3342    #[must_use]
3343    pub fn is_(&self, other: &Self) -> bool {
3344        std::ptr::eq(self, other)
3345    }
3346
3347    #[must_use]
3348    pub fn equals(&self, other: &Self) -> bool {
3349        self.index.equals(&other.index)
3350    }
3351
3352    #[must_use]
3353    pub fn identical(&self, other: &Self) -> bool {
3354        self.index.identical(&other.index)
3355    }
3356
3357    #[must_use]
3358    pub fn holds_integer(&self) -> bool {
3359        false
3360    }
3361
3362    #[must_use]
3363    pub fn inferred_type(&self) -> &'static str {
3364        "datetime64"
3365    }
3366
3367    #[must_use]
3368    pub fn is_boolean(&self) -> bool {
3369        false
3370    }
3371
3372    #[must_use]
3373    pub fn is_categorical(&self) -> bool {
3374        false
3375    }
3376
3377    #[must_use]
3378    pub fn is_floating(&self) -> bool {
3379        false
3380    }
3381
3382    #[must_use]
3383    pub fn is_integer(&self) -> bool {
3384        false
3385    }
3386
3387    #[must_use]
3388    pub fn is_interval(&self) -> bool {
3389        false
3390    }
3391
3392    #[must_use]
3393    pub fn is_numeric(&self) -> bool {
3394        false
3395    }
3396
3397    #[must_use]
3398    pub fn is_object(&self) -> bool {
3399        false
3400    }
3401
3402    #[must_use]
3403    pub fn nanos(&self) -> Vec<Option<i64>> {
3404        self.index
3405            .labels()
3406            .iter()
3407            .map(|label| match label {
3408                IndexLabel::Datetime64(nanos) if *nanos != i64::MIN => Some(*nanos),
3409                IndexLabel::Int64(_)
3410                | IndexLabel::Utf8(_)
3411                | IndexLabel::Timedelta64(_)
3412                | IndexLabel::Datetime64(_)
3413                | IndexLabel::Null(_) => None,
3414            })
3415            .collect()
3416    }
3417
3418    #[must_use]
3419    pub fn values(&self) -> Vec<Option<i64>> {
3420        self.nanos()
3421    }
3422
3423    #[must_use]
3424    pub fn to_list(&self) -> Vec<Option<i64>> {
3425        self.nanos()
3426    }
3427
3428    #[must_use]
3429    pub fn tolist(&self) -> Vec<Option<i64>> {
3430        self.to_list()
3431    }
3432
3433    #[must_use]
3434    pub fn to_numpy(&self) -> Vec<Option<i64>> {
3435        self.nanos()
3436    }
3437
3438    #[must_use]
3439    pub fn array(&self) -> Vec<Option<i64>> {
3440        self.nanos()
3441    }
3442
3443    /// Underlying nanoseconds-since-epoch, matching `pd.DatetimeIndex.asi8`.
3444    /// NAT is preserved as `i64::MIN` to match the on-disk sentinel.
3445    #[must_use]
3446    pub fn asi8(&self) -> Vec<i64> {
3447        self.index
3448            .labels()
3449            .iter()
3450            .map(|label| match label {
3451                IndexLabel::Datetime64(nanos) => *nanos,
3452                IndexLabel::Int64(_)
3453                | IndexLabel::Utf8(_)
3454                | IndexLabel::Timedelta64(_)
3455                | IndexLabel::Null(_) => i64::MIN,
3456            })
3457            .collect()
3458    }
3459
3460    /// Convert datetime labels to period ordinals at the requested frequency,
3461    /// matching `pd.DatetimeIndex.to_period(freq)` for supported fixed
3462    /// calendar frequencies.
3463    pub fn to_period(&self, freq: &str) -> Result<PeriodIndex, IndexError> {
3464        let period_freq = PeriodFreq::parse(freq).ok_or_else(|| {
3465            IndexError::InvalidArgument(format!("to_period: unsupported frequency '{freq}'"))
3466        })?;
3467        let periods = self
3468            .index
3469            .labels()
3470            .iter()
3471            .map(|label| match label {
3472                IndexLabel::Datetime64(nanos) => datetime_nanos_to_period(*nanos, period_freq),
3473                other => Err(IndexError::InvalidArgument(format!(
3474                    "to_period requires DatetimeIndex labels, got {other:?}"
3475                ))),
3476            })
3477            .collect::<Result<Vec<_>, _>>()?;
3478        let mut out = PeriodIndex::new(periods);
3479        if let Some(name) = self.name() {
3480            out = out.set_name(name);
3481        }
3482        Ok(out)
3483    }
3484
3485    /// Format each timestamp using a chrono format string, matching
3486    /// `pd.DatetimeIndex.strftime(format)`. NAT propagates as `None`.
3487    #[must_use]
3488    pub fn strftime(&self, format: &str) -> Vec<Option<String>> {
3489        map_datetime_labels(self.index.labels(), |dt| dt.format(format).to_string())
3490    }
3491
3492    /// Position of the maximum label, matching `pd.DatetimeIndex.argmax()`.
3493    /// Pandas returns the *first* tied position and skips NAT entries; this
3494    /// method walks the labels itself to match that ordering exactly. Empty
3495    /// indexes (or all-NAT indexes) raise pandas-style `ValueError` mirrored
3496    /// as [`IndexError::InvalidArgument`].
3497    pub fn argmax(&self) -> Result<usize, IndexError> {
3498        let labels = self.index.labels();
3499        let mut best: Option<usize> = None;
3500        for (i, label) in labels.iter().enumerate() {
3501            let nanos = match label {
3502                IndexLabel::Datetime64(n) if *n != i64::MIN => *n,
3503                _ => continue,
3504            };
3505            best = Some(match best {
3506                Some(b) => match labels[b] {
3507                    IndexLabel::Datetime64(prev) if nanos > prev => i,
3508                    _ => b,
3509                },
3510                None => i,
3511            });
3512        }
3513        best.ok_or_else(|| {
3514            IndexError::InvalidArgument("attempt to get argmax of an empty sequence".to_owned())
3515        })
3516    }
3517
3518    /// Position of the minimum label, matching `pd.DatetimeIndex.argmin()`.
3519    /// Returns the first-tied position and skips NAT to match pandas semantics.
3520    pub fn argmin(&self) -> Result<usize, IndexError> {
3521        let labels = self.index.labels();
3522        let mut best: Option<usize> = None;
3523        for (i, label) in labels.iter().enumerate() {
3524            let nanos = match label {
3525                IndexLabel::Datetime64(n) if *n != i64::MIN => *n,
3526                _ => continue,
3527            };
3528            best = Some(match best {
3529                Some(b) => match labels[b] {
3530                    IndexLabel::Datetime64(prev) if nanos < prev => i,
3531                    _ => b,
3532                },
3533                None => i,
3534            });
3535        }
3536        best.ok_or_else(|| {
3537            IndexError::InvalidArgument("attempt to get argmin of an empty sequence".to_owned())
3538        })
3539    }
3540
3541    /// Positions that would sort the labels ascending, matching
3542    /// `pd.DatetimeIndex.argsort()`.
3543    #[must_use]
3544    pub fn argsort(&self) -> Vec<usize> {
3545        self.index.argsort()
3546    }
3547
3548    /// First-seen unique labels, matching `pd.DatetimeIndex.unique()`.
3549    /// Returns a new DatetimeIndex.
3550    pub fn unique(&self) -> Result<Self, IndexError> {
3551        Self::from_index(self.index.unique())
3552    }
3553
3554    /// Identity-stable factorization, matching `pd.DatetimeIndex.factorize()`.
3555    /// Returns `(codes, uniques)` where `uniques` is rebuilt as DatetimeIndex.
3556    pub fn factorize(&self) -> Result<(Vec<isize>, Self), IndexError> {
3557        let (codes, uniques) = self.index.factorize();
3558        Ok((codes, Self::from_index(uniques)?))
3559    }
3560
3561    /// Value counts, matching `pd.DatetimeIndex.value_counts()`.
3562    #[must_use]
3563    pub fn value_counts(&self) -> Vec<(IndexLabel, usize)> {
3564        self.index.value_counts()
3565    }
3566
3567    /// Duplicate mask per position, matching `pd.DatetimeIndex.duplicated(keep)`.
3568    #[must_use]
3569    pub fn duplicated(&self, keep: DuplicateKeep) -> Vec<bool> {
3570        self.index.duplicated(keep)
3571    }
3572
3573    /// Drop duplicate labels, matching `pd.DatetimeIndex.drop_duplicates()`.
3574    pub fn drop_duplicates(&self) -> Result<Self, IndexError> {
3575        Self::from_index(self.index.drop_duplicates())
3576    }
3577
3578    /// Pick labels at the given positions, matching `pd.DatetimeIndex.take()`.
3579    /// Out-of-bounds positions raise [`IndexError::OutOfBounds`].
3580    pub fn take(&self, positions: &[usize]) -> Result<Self, IndexError> {
3581        let labels = self.index.labels();
3582        for &p in positions {
3583            if p >= labels.len() {
3584                return Err(IndexError::OutOfBounds {
3585                    position: p,
3586                    length: labels.len(),
3587                });
3588            }
3589        }
3590        let nanos: Vec<i64> = positions
3591            .iter()
3592            .map(|&p| match labels[p] {
3593                IndexLabel::Datetime64(n) => n,
3594                _ => i64::MIN,
3595            })
3596            .collect();
3597        let mut out = Self::new(nanos);
3598        if let Some(name) = self.name() {
3599            out = out.set_name(name);
3600        }
3601        Ok(out)
3602    }
3603
3604    /// Repeat each label `repeats` times, matching `pd.DatetimeIndex.repeat()`.
3605    #[must_use]
3606    pub fn repeat(&self, repeats: usize) -> Self {
3607        let mut out = Vec::with_capacity(self.len() * repeats);
3608        for label in self.index.labels() {
3609            if let IndexLabel::Datetime64(n) = label {
3610                for _ in 0..repeats {
3611                    out.push(*n);
3612                }
3613            }
3614        }
3615        let mut result = Self::new(out);
3616        if let Some(name) = self.name() {
3617            result = result.set_name(name);
3618        }
3619        result
3620    }
3621
3622    /// Per-position membership mask, matching `pd.DatetimeIndex.isin(values)`.
3623    /// `values` is interpreted as a slice of nanoseconds-since-epoch; pass
3624    /// `i64::MIN` to test for NAT.
3625    #[must_use]
3626    pub fn isin(&self, values: &[i64]) -> Vec<bool> {
3627        let needle: FxHashSet<i64> = values.iter().copied().collect();
3628        self.index
3629            .labels()
3630            .iter()
3631            .map(|label| match label {
3632                IndexLabel::Datetime64(n) => needle.contains(n),
3633                _ => false,
3634            })
3635            .collect()
3636    }
3637
3638    /// Concatenate with another DatetimeIndex, matching
3639    /// `pd.DatetimeIndex.append(other)`. The index name is preserved when
3640    /// both operands share it; otherwise pandas drops the name.
3641    #[must_use]
3642    pub fn append(&self, other: &Self) -> Self {
3643        let mut nanos: Vec<i64> = self
3644            .index
3645            .labels()
3646            .iter()
3647            .filter_map(|label| match label {
3648                IndexLabel::Datetime64(n) => Some(*n),
3649                _ => None,
3650            })
3651            .collect();
3652        nanos.extend(other.index.labels().iter().filter_map(|label| match label {
3653            IndexLabel::Datetime64(n) => Some(*n),
3654            _ => None,
3655        }));
3656        let mut out = Self::new(nanos);
3657        if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
3658            out = out.set_name(name);
3659        }
3660        out
3661    }
3662
3663    /// Minimum non-NAT label, matching `pd.DatetimeIndex.min()`.
3664    /// Returns `None` for empty or all-NAT inputs to mirror pandas' NaT.
3665    #[must_use]
3666    pub fn min(&self) -> Option<i64> {
3667        self.index
3668            .labels()
3669            .iter()
3670            .filter_map(|label| match label {
3671                IndexLabel::Datetime64(n) if *n != i64::MIN => Some(*n),
3672                _ => None,
3673            })
3674            .min()
3675    }
3676
3677    /// Shift each label by `periods` units of `freq_nanos`, matching
3678    /// `pd.DatetimeIndex.shift(periods, freq)` once `freq` has been
3679    /// resolved to a nanosecond duration. NAT propagates as NAT;
3680    /// arithmetic overflow saturates.
3681    #[must_use]
3682    pub fn shift(&self, periods: i64, freq_nanos: i64) -> Self {
3683        let delta = periods.saturating_mul(freq_nanos);
3684        let nanos: Vec<i64> = self
3685            .index
3686            .labels()
3687            .iter()
3688            .map(|label| match label {
3689                IndexLabel::Datetime64(n) if *n != i64::MIN => n.saturating_add(delta),
3690                _ => i64::MIN,
3691            })
3692            .collect();
3693        let mut out = Self::new(nanos);
3694        if let Some(name) = self.name() {
3695            out = out.set_name(name);
3696        }
3697        out
3698    }
3699
3700    /// Positional first differences, matching `pd.DatetimeIndex.diff()`.
3701    /// Datetime deltas materialize as a TimedeltaIndex; NAT inputs propagate
3702    /// to NAT outputs, and signed `periods` follows pandas' forward/backward
3703    /// lookup direction.
3704    #[must_use]
3705    pub fn diff(&self, periods: i64) -> TimedeltaIndex {
3706        let labels = self.index.labels();
3707        optional_diffs_to_timedelta_index(
3708            positional_diff(labels.len(), periods, |current, previous| {
3709                match (&labels[current], &labels[previous]) {
3710                    (
3711                        IndexLabel::Datetime64(current_nanos),
3712                        IndexLabel::Datetime64(previous_nanos),
3713                    ) if *current_nanos != i64::MIN && *previous_nanos != i64::MIN => {
3714                        current_nanos.checked_sub(*previous_nanos)
3715                    }
3716                    _ => None,
3717                }
3718            }),
3719            self.name(),
3720        )
3721    }
3722
3723    fn round_fixed_freq(&self, freq: &str, mode: TemporalRoundMode) -> Result<Self, IndexError> {
3724        let unit_nanos = parse_fixed_temporal_freq(freq, "DatetimeIndex rounding")?;
3725        let nanos: Vec<i64> = self
3726            .index
3727            .labels()
3728            .iter()
3729            .map(|label| match label {
3730                IndexLabel::Datetime64(n) if *n != i64::MIN => {
3731                    round_nanos_to_unit(*n, unit_nanos, mode)
3732                }
3733                _ => i64::MIN,
3734            })
3735            .collect();
3736        let mut out = Self::new(nanos);
3737        if let Some(name) = self.name() {
3738            out = out.set_name(name);
3739        }
3740        Ok(out)
3741    }
3742
3743    /// Round timestamps down to a fixed pandas frequency.
3744    pub fn floor(&self, freq: &str) -> Result<Self, IndexError> {
3745        self.round_fixed_freq(freq, TemporalRoundMode::Floor)
3746    }
3747
3748    /// Round timestamps up to a fixed pandas frequency.
3749    pub fn ceil(&self, freq: &str) -> Result<Self, IndexError> {
3750        self.round_fixed_freq(freq, TemporalRoundMode::Ceil)
3751    }
3752
3753    /// Round timestamps to the nearest fixed pandas frequency, using half-even ties.
3754    pub fn round(&self, freq: &str) -> Result<Self, IndexError> {
3755        self.round_fixed_freq(freq, TemporalRoundMode::Round)
3756    }
3757
3758    /// Validate the frequency and return a clone, matching pandas DatetimeIndex.snap.
3759    pub fn snap(&self, freq: &str) -> Result<Self, IndexError> {
3760        parse_fixed_temporal_freq(freq, "DatetimeIndex.snap")?;
3761        Ok(self.clone())
3762    }
3763
3764    /// Average non-NAT label as nanoseconds-since-epoch, matching
3765    /// `pd.DatetimeIndex.mean()`. Empty / all-NAT returns `None`.
3766    /// Sum is computed in `i128` to avoid overflow.
3767    #[must_use]
3768    pub fn mean(&self) -> Option<i64> {
3769        let mut total: i128 = 0;
3770        let mut count: i128 = 0;
3771        for label in self.index.labels() {
3772            if let IndexLabel::Datetime64(n) = label
3773                && *n != i64::MIN
3774            {
3775                total += i128::from(*n);
3776                count += 1;
3777            }
3778        }
3779        if count == 0 {
3780            return None;
3781        }
3782        i64::try_from(total / count).ok()
3783    }
3784
3785    /// Sample variance over non-NAT labels in nanoseconds-squared,
3786    /// matching `pd.DatetimeIndex.var(ddof=1)`. Returns `None` for
3787    /// fewer than two non-NAT entries.
3788    #[must_use]
3789    pub fn var(&self) -> Option<f64> {
3790        let nanos: Vec<f64> = self
3791            .index
3792            .labels()
3793            .iter()
3794            .filter_map(|label| match label {
3795                IndexLabel::Datetime64(n) if *n != i64::MIN => Some(*n as f64),
3796                _ => None,
3797            })
3798            .collect();
3799        if nanos.len() < 2 {
3800            return None;
3801        }
3802        let mean = nanos.iter().sum::<f64>() / nanos.len() as f64;
3803        Some(nanos.iter().map(|n| (n - mean).powi(2)).sum::<f64>() / (nanos.len() as f64 - 1.0))
3804    }
3805
3806    /// Sample standard deviation of non-NAT labels in nanoseconds,
3807    /// matching `pd.DatetimeIndex.std(ddof=1)`. Returns `None` for
3808    /// fewer than two non-NAT entries.
3809    #[must_use]
3810    pub fn std(&self) -> Option<i64> {
3811        let nanos: Vec<f64> = self
3812            .index
3813            .labels()
3814            .iter()
3815            .filter_map(|label| match label {
3816                IndexLabel::Datetime64(n) if *n != i64::MIN => Some(*n as f64),
3817                _ => None,
3818            })
3819            .collect();
3820        if nanos.len() < 2 {
3821            return None;
3822        }
3823        let mean = nanos.iter().sum::<f64>() / nanos.len() as f64;
3824        let var =
3825            nanos.iter().map(|n| (n - mean).powi(2)).sum::<f64>() / (nanos.len() as f64 - 1.0);
3826        Some(var.sqrt() as i64)
3827    }
3828
3829    /// Median non-NAT label, matching `pd.DatetimeIndex.median()`. Empty
3830    /// returns None. For an even-length non-NAT subset, returns the
3831    /// average of the two middle values.
3832    #[must_use]
3833    pub fn median(&self) -> Option<i64> {
3834        let mut nanos: Vec<i64> = self
3835            .index
3836            .labels()
3837            .iter()
3838            .filter_map(|label| match label {
3839                IndexLabel::Datetime64(n) if *n != i64::MIN => Some(*n),
3840                _ => None,
3841            })
3842            .collect();
3843        if nanos.is_empty() {
3844            return None;
3845        }
3846        nanos.sort_unstable();
3847        let mid = nanos.len() / 2;
3848        if nanos.len() % 2 == 1 {
3849            Some(nanos[mid])
3850        } else {
3851            let total = i128::from(nanos[mid - 1]) + i128::from(nanos[mid]);
3852            i64::try_from(total / 2).ok()
3853        }
3854    }
3855
3856    /// Maximum non-NAT label, matching `pd.DatetimeIndex.max()`.
3857    #[must_use]
3858    pub fn max(&self) -> Option<i64> {
3859        self.index
3860            .labels()
3861            .iter()
3862            .filter_map(|label| match label {
3863                IndexLabel::Datetime64(n) if *n != i64::MIN => Some(*n),
3864                _ => None,
3865            })
3866            .max()
3867    }
3868
3869    /// Labels present in both indexes, matching
3870    /// `pd.DatetimeIndex.intersection(other)`. Preserves first-seen order
3871    /// from `self`.
3872    #[must_use]
3873    pub fn intersection(&self, other: &Self) -> Self {
3874        let other_set: FxHashSet<i64> = other
3875            .index
3876            .labels()
3877            .iter()
3878            .filter_map(|label| match label {
3879                IndexLabel::Datetime64(n) => Some(*n),
3880                _ => None,
3881            })
3882            .collect();
3883        let mut seen = FxHashSet::<i64>::default();
3884        let nanos: Vec<i64> = self
3885            .index
3886            .labels()
3887            .iter()
3888            .filter_map(|label| match label {
3889                IndexLabel::Datetime64(n) if other_set.contains(n) && seen.insert(*n) => Some(*n),
3890                _ => None,
3891            })
3892            .collect();
3893        let mut out = Self::new(nanos);
3894        if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
3895            out = out.set_name(name);
3896        }
3897        out
3898    }
3899
3900    /// Labels from self followed by labels from other not already present,
3901    /// matching `pd.DatetimeIndex.union(other)`.
3902    #[must_use]
3903    pub fn union(&self, other: &Self) -> Self {
3904        let mut seen = FxHashSet::<i64>::default();
3905        let mut nanos: Vec<i64> = Vec::new();
3906        for label in self
3907            .index
3908            .labels()
3909            .iter()
3910            .chain(other.index.labels().iter())
3911        {
3912            if let IndexLabel::Datetime64(n) = label
3913                && seen.insert(*n)
3914            {
3915                nanos.push(*n);
3916            }
3917        }
3918        let mut out = Self::new(nanos);
3919        if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
3920            out = out.set_name(name);
3921        }
3922        out
3923    }
3924
3925    /// Labels in self not in other, matching
3926    /// `pd.DatetimeIndex.difference(other)`.
3927    #[must_use]
3928    pub fn difference(&self, other: &Self) -> Self {
3929        let other_set: FxHashSet<i64> = other
3930            .index
3931            .labels()
3932            .iter()
3933            .filter_map(|label| match label {
3934                IndexLabel::Datetime64(n) => Some(*n),
3935                _ => None,
3936            })
3937            .collect();
3938        let mut seen = FxHashSet::<i64>::default();
3939        let nanos: Vec<i64> = self
3940            .index
3941            .labels()
3942            .iter()
3943            .filter_map(|label| match label {
3944                IndexLabel::Datetime64(n) if !other_set.contains(n) && seen.insert(*n) => Some(*n),
3945                _ => None,
3946            })
3947            .collect();
3948        let mut out = Self::new(nanos);
3949        // Per br-frankenpandas-6r1lq: difference is asymmetric — pandas
3950        // always preserves self.name (unlike union/intersection which use
3951        // shared_name).
3952        if let Some(name) = self.name() {
3953            out = out.set_name(name);
3954        }
3955        out
3956    }
3957
3958    /// Labels in either but not both, matching
3959    /// `pd.DatetimeIndex.symmetric_difference(other)`.
3960    #[must_use]
3961    pub fn symmetric_difference(&self, other: &Self) -> Self {
3962        let self_set: FxHashSet<i64> = self
3963            .index
3964            .labels()
3965            .iter()
3966            .filter_map(|label| match label {
3967                IndexLabel::Datetime64(n) => Some(*n),
3968                _ => None,
3969            })
3970            .collect();
3971        let other_set: FxHashSet<i64> = other
3972            .index
3973            .labels()
3974            .iter()
3975            .filter_map(|label| match label {
3976                IndexLabel::Datetime64(n) => Some(*n),
3977                _ => None,
3978            })
3979            .collect();
3980        let mut seen = FxHashSet::<i64>::default();
3981        let mut nanos: Vec<i64> = Vec::new();
3982        for label in self.index.labels() {
3983            if let IndexLabel::Datetime64(n) = label
3984                && !other_set.contains(n)
3985                && seen.insert(*n)
3986            {
3987                nanos.push(*n);
3988            }
3989        }
3990        for label in other.index.labels() {
3991            if let IndexLabel::Datetime64(n) = label
3992                && !self_set.contains(n)
3993                && seen.insert(*n)
3994            {
3995                nanos.push(*n);
3996            }
3997        }
3998        let mut out = Self::new(nanos);
3999        if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
4000            out = out.set_name(name);
4001        }
4002        out
4003    }
4004
4005    /// Sort labels ascending, matching `pd.DatetimeIndex.sort_values()`.
4006    /// NAT sorts first because the underlying sentinel is `i64::MIN`,
4007    /// matching pandas' `na_position='first'` default for datetime indexes.
4008    #[must_use]
4009    pub fn sort_values(&self) -> Self {
4010        let mut nanos: Vec<i64> = self
4011            .index
4012            .labels()
4013            .iter()
4014            .filter_map(|label| match label {
4015                IndexLabel::Datetime64(n) => Some(*n),
4016                _ => None,
4017            })
4018            .collect();
4019        nanos.sort_unstable();
4020        let mut out = Self::new(nanos);
4021        if let Some(name) = self.name() {
4022            out = out.set_name(name);
4023        }
4024        out
4025    }
4026
4027    /// Alias for `sort_values`, matching `pd.DatetimeIndex.sort()`.
4028    #[must_use]
4029    pub fn sort(&self) -> Self {
4030        self.sort_values()
4031    }
4032
4033    /// Remove the label at the given position, matching
4034    /// `pd.DatetimeIndex.delete(loc)`.
4035    pub fn delete(&self, loc: usize) -> Result<Self, IndexError> {
4036        let labels = self.index.labels();
4037        if loc >= labels.len() {
4038            return Err(IndexError::OutOfBounds {
4039                position: loc,
4040                length: labels.len(),
4041            });
4042        }
4043        let nanos: Vec<i64> = labels
4044            .iter()
4045            .enumerate()
4046            .filter(|(i, _)| *i != loc)
4047            .filter_map(|(_, label)| match label {
4048                IndexLabel::Datetime64(n) => Some(*n),
4049                _ => None,
4050            })
4051            .collect();
4052        let mut out = Self::new(nanos);
4053        if let Some(name) = self.name() {
4054            out = out.set_name(name);
4055        }
4056        Ok(out)
4057    }
4058
4059    /// Replace positions where `cond` is `false` with `other`, matching
4060    /// `pd.DatetimeIndex.where(cond, other)`. Pass `i64::MIN` to insert
4061    /// NAT.
4062    pub fn r#where(&self, cond: &[bool], other: i64) -> Result<Self, IndexError> {
4063        let labels = self.index.labels();
4064        if cond.len() != labels.len() {
4065            return Err(IndexError::LengthMismatch {
4066                expected: labels.len(),
4067                actual: cond.len(),
4068                context: "where: cond length must match index length".to_owned(),
4069            });
4070        }
4071        let nanos: Vec<i64> = labels
4072            .iter()
4073            .zip(cond.iter())
4074            .map(|(label, &keep)| {
4075                if keep {
4076                    match label {
4077                        IndexLabel::Datetime64(n) => *n,
4078                        _ => i64::MIN,
4079                    }
4080                } else {
4081                    other
4082                }
4083            })
4084            .collect();
4085        let mut out = Self::new(nanos);
4086        if let Some(name) = self.name() {
4087            out = out.set_name(name);
4088        }
4089        Ok(out)
4090    }
4091
4092    /// Replace positions where `mask` is `true` with `value`, matching
4093    /// `pd.DatetimeIndex.putmask(mask, value)`. The complement of `where`.
4094    pub fn putmask(&self, mask: &[bool], value: i64) -> Result<Self, IndexError> {
4095        let labels = self.index.labels();
4096        if mask.len() != labels.len() {
4097            return Err(IndexError::LengthMismatch {
4098                expected: labels.len(),
4099                actual: mask.len(),
4100                context: "putmask: mask length must match index length".to_owned(),
4101            });
4102        }
4103        let nanos: Vec<i64> = labels
4104            .iter()
4105            .zip(mask.iter())
4106            .map(|(label, &replace)| {
4107                if replace {
4108                    value
4109                } else {
4110                    match label {
4111                        IndexLabel::Datetime64(n) => *n,
4112                        _ => i64::MIN,
4113                    }
4114                }
4115            })
4116            .collect();
4117        let mut out = Self::new(nanos);
4118        if let Some(name) = self.name() {
4119            out = out.set_name(name);
4120        }
4121        Ok(out)
4122    }
4123
4124    /// Binary-search insertion position, matching
4125    /// `pd.DatetimeIndex.searchsorted(value, side)`. The needle is the
4126    /// nanoseconds-since-epoch value to locate; pandas behavior on NAT
4127    /// needles is to raise, mirrored as
4128    /// [`IndexError::InvalidArgument("searchsorted: needle cannot be missing")`].
4129    pub fn searchsorted(&self, value: i64, side: &str) -> Result<usize, IndexError> {
4130        self.index
4131            .searchsorted(&IndexLabel::Datetime64(value), side)
4132    }
4133
4134    /// Convert each label to a `chrono::DateTime<Utc>`, matching
4135    /// `pd.DatetimeIndex.to_pydatetime()`. NAT propagates as `None`.
4136    #[must_use]
4137    pub fn to_pydatetime(&self) -> Vec<Option<chrono::DateTime<chrono::Utc>>> {
4138        self.index
4139            .labels()
4140            .iter()
4141            .map(|label| match label {
4142                IndexLabel::Datetime64(nanos) => datetime_from_nanos(*nanos),
4143                _ => None,
4144            })
4145            .collect()
4146    }
4147
4148    /// Insert `value` at position `loc`, matching
4149    /// `pd.DatetimeIndex.insert(loc, value)`. `loc == len()` appends;
4150    /// `loc > len()` raises [`IndexError::OutOfBounds`].
4151    pub fn insert(&self, loc: usize, value: i64) -> Result<Self, IndexError> {
4152        let labels = self.index.labels();
4153        if loc > labels.len() {
4154            return Err(IndexError::OutOfBounds {
4155                position: loc,
4156                length: labels.len(),
4157            });
4158        }
4159        let mut nanos: Vec<i64> = labels
4160            .iter()
4161            .filter_map(|label| match label {
4162                IndexLabel::Datetime64(n) => Some(*n),
4163                _ => None,
4164            })
4165            .collect();
4166        nanos.insert(loc, value);
4167        let mut out = Self::new(nanos);
4168        if let Some(name) = self.name() {
4169            out = out.set_name(name);
4170        }
4171        Ok(out)
4172    }
4173
4174    /// Stringify each label, matching `pd.DatetimeIndex.format()`.
4175    /// Non-NAT labels render as the chrono RFC3339 timestamp; NAT
4176    /// renders as the `NaT` literal pandas uses.
4177    #[must_use]
4178    pub fn format(&self) -> Vec<String> {
4179        self.index
4180            .labels()
4181            .iter()
4182            .map(|label| match label {
4183                IndexLabel::Datetime64(nanos) => match datetime_from_nanos(*nanos) {
4184                    Some(dt) => dt.to_rfc3339(),
4185                    None => "NaT".to_owned(),
4186                },
4187                _ => "NaT".to_owned(),
4188            })
4189            .collect()
4190    }
4191
4192    /// Replace NAT positions with `value`, matching
4193    /// `pd.DatetimeIndex.fillna(value)`. Preserves the index name.
4194    #[must_use]
4195    pub fn fillna(&self, value: i64) -> Self {
4196        let nanos: Vec<i64> = self
4197            .index
4198            .labels()
4199            .iter()
4200            .map(|label| match label {
4201                IndexLabel::Datetime64(n) if *n != i64::MIN => *n,
4202                _ => value,
4203            })
4204            .collect();
4205        let mut out = Self::new(nanos);
4206        if let Some(name) = self.name() {
4207            out = out.set_name(name);
4208        }
4209        out
4210    }
4211
4212    /// Alias for [`isna`], matching `pd.DatetimeIndex.isnull()`.
4213    #[must_use]
4214    pub fn isnull(&self) -> Vec<bool> {
4215        self.isna()
4216    }
4217
4218    /// Alias for [`notna`], matching `pd.DatetimeIndex.notnull()`.
4219    #[must_use]
4220    pub fn notnull(&self) -> Vec<bool> {
4221        self.notna()
4222    }
4223
4224    /// Calendar date part of each label, matching `pd.DatetimeIndex.date`.
4225    #[must_use]
4226    pub fn date(&self) -> Vec<Option<chrono::NaiveDate>> {
4227        map_datetime_labels(self.index.labels(), |dt| dt.date_naive())
4228    }
4229
4230    /// Within-day clock time of each label, matching
4231    /// `pd.DatetimeIndex.time`.
4232    #[must_use]
4233    pub fn time(&self) -> Vec<Option<chrono::NaiveTime>> {
4234        map_datetime_labels(self.index.labels(), |dt| dt.time())
4235    }
4236
4237    /// Time component preserving timezone semantics, matching
4238    /// `pd.DatetimeIndex.timetz`. FrankenPandas currently stores
4239    /// timezone-naive UTC nanoseconds, so this matches [`Self::time`].
4240    #[must_use]
4241    pub fn timetz(&self) -> Vec<Option<chrono::NaiveTime>> {
4242        self.time()
4243    }
4244
4245    /// Convert each label to its Julian Date, matching
4246    /// `pd.DatetimeIndex.to_julian_date()`. The formula is
4247    /// `JD = unix_seconds / 86400 + 2440587.5` and the result is
4248    /// computed in f64; NAT propagates as `None`.
4249    #[must_use]
4250    pub fn to_julian_date(&self) -> Vec<Option<f64>> {
4251        const SECONDS_PER_DAY: f64 = 86_400.0;
4252        const UNIX_EPOCH_JD: f64 = 2_440_587.5;
4253        self.index
4254            .labels()
4255            .iter()
4256            .map(|label| match label {
4257                IndexLabel::Datetime64(nanos) if *nanos != i64::MIN => {
4258                    let secs = (*nanos as f64) / 1_000_000_000.0;
4259                    Some(secs / SECONDS_PER_DAY + UNIX_EPOCH_JD)
4260                }
4261                _ => None,
4262            })
4263            .collect()
4264    }
4265
4266    /// Annotate a tz-naive index with `tz`, matching
4267    /// `pd.DatetimeIndex.tz_localize(tz)`. FrankenPandas's storage is
4268    /// already UTC-naive so localizing to `"UTC"` is a no-op clone;
4269    /// every other timezone rejects until full tz metadata lands.
4270    pub fn tz_localize(&self, tz: &str) -> Result<Self, IndexError> {
4271        match tz {
4272            "UTC" | "utc" => Ok(self.clone()),
4273            other => Err(IndexError::InvalidArgument(format!(
4274                "tz_localize: only 'UTC' is supported until timezone metadata lands; got {other:?}"
4275            ))),
4276        }
4277    }
4278
4279    /// Convert a tz-aware index from its current zone to `tz`, matching
4280    /// `pd.DatetimeIndex.tz_convert(tz)`. FrankenPandas indexes are
4281    /// tz-naive (no source timezone) so this always rejects.
4282    pub fn tz_convert(&self, _tz: &str) -> Result<Self, IndexError> {
4283        Err(IndexError::InvalidArgument(
4284            "tz_convert: cannot convert tz-naive timestamps; call tz_localize('UTC') first"
4285                .to_owned(),
4286        ))
4287    }
4288
4289    /// Timezone label, matching `pd.DatetimeIndex.tz`. FrankenPandas
4290    /// stores naive UTC nanos so this always returns `None`; a
4291    /// follow-up bead will introduce timezone metadata.
4292    #[must_use]
4293    pub fn tz(&self) -> Option<String> {
4294        None
4295    }
4296
4297    /// Alias for [`tz`], matching `pd.DatetimeIndex.tzinfo`.
4298    #[must_use]
4299    pub fn tzinfo(&self) -> Option<String> {
4300        self.tz()
4301    }
4302
4303    /// Frequency string, matching `pd.DatetimeIndex.freq`. FrankenPandas
4304    /// does not infer datetime frequency yet so this returns `None`.
4305    #[must_use]
4306    pub fn freq(&self) -> Option<String> {
4307        None
4308    }
4309
4310    /// Frequency alias string, matching `pd.DatetimeIndex.freqstr`.
4311    #[must_use]
4312    pub fn freqstr(&self) -> Option<String> {
4313        self.freq()
4314    }
4315
4316    /// Inferred frequency, matching `pd.DatetimeIndex.inferred_freq`.
4317    #[must_use]
4318    pub fn inferred_freq(&self) -> Option<String> {
4319        None
4320    }
4321
4322    /// Cast to a different storage resolution, matching
4323    /// `pd.DatetimeIndex.as_unit(unit)`. FrankenPandas's storage is fixed
4324    /// at nanoseconds so only `"ns"` is supported as a no-op clone; other
4325    /// units reject with a typed compatibility error.
4326    pub fn as_unit(&self, unit: &str) -> Result<Self, IndexError> {
4327        match unit {
4328            "ns" => Ok(self.clone()),
4329            other => Err(IndexError::InvalidArgument(format!(
4330                "as_unit: only 'ns' is supported by FrankenPandas's Datetime64 storage; got {other:?}"
4331            ))),
4332        }
4333    }
4334
4335    /// Storage resolution unit, matching `pd.DatetimeIndex.unit`. Always
4336    /// `"ns"` because FrankenPandas stores Datetime64 as nanoseconds.
4337    #[must_use]
4338    pub fn unit(&self) -> &'static str {
4339        "ns"
4340    }
4341
4342    /// Resolution string, matching `pd.DatetimeIndex.resolution`. Always
4343    /// `"nanosecond"` because the underlying storage is fixed at ns.
4344    #[must_use]
4345    pub fn resolution(&self) -> &'static str {
4346        "nanosecond"
4347    }
4348
4349    /// First position of `value`, matching `pd.DatetimeIndex.get_loc(value)`.
4350    /// Pandas raises KeyError for missing values; this surface mirrors
4351    /// that with [`IndexError::InvalidArgument`].
4352    pub fn get_loc(&self, value: i64) -> Result<usize, IndexError> {
4353        // Delegate to Index::position, which binary-searches a monotonic
4354        // (AscendingDatetime64) index in O(log n) instead of the O(n) linear
4355        // scan, and falls back to the same first-match linear scan when unsorted
4356        // (br-frankenpandas-idxdup). Bit-identical: a Datetime64(value) needle
4357        // matches exactly the labels this scan accepted.
4358        self.index
4359            .position(&IndexLabel::Datetime64(value))
4360            .ok_or_else(|| {
4361                IndexError::InvalidArgument(format!("get_loc: {value} not in DatetimeIndex"))
4362            })
4363    }
4364
4365    /// Set the index name, matching `pd.DatetimeIndex.rename(name)`.
4366    /// Alias for set_name.
4367    #[must_use]
4368    pub fn rename(&self, name: &str) -> Self {
4369        self.set_name(name)
4370    }
4371
4372    /// Reindex against `target`, matching
4373    /// `pd.DatetimeIndex.reindex(target)`. Returns
4374    /// `(target.clone(), indexer)` where indexer is the per-target
4375    /// position from get_indexer (with -1 for missing).
4376    #[must_use]
4377    pub fn reindex(&self, target: &Self) -> (Self, Vec<isize>) {
4378        let labels: Vec<i64> = target
4379            .index
4380            .labels()
4381            .iter()
4382            .filter_map(|label| match label {
4383                IndexLabel::Datetime64(n) => Some(*n),
4384                _ => None,
4385            })
4386            .collect();
4387        let indexer = self.get_indexer(&labels);
4388        (target.clone(), indexer)
4389    }
4390
4391    /// Locate every position matching each target, matching
4392    /// `pd.DatetimeIndex.get_indexer_non_unique(targets)`. Returns
4393    /// `(positions, missing)` where `positions` lists every source
4394    /// position matching any target (in target order) and `missing`
4395    /// lists target ordinals that had no match.
4396    #[must_use]
4397    pub fn get_indexer_non_unique(&self, targets: &[i64]) -> (Vec<isize>, Vec<usize>) {
4398        let labels = self.index.labels();
4399        let mut by_value = FxHashMap::<i64, Vec<usize>>::default();
4400        for (i, label) in labels.iter().enumerate() {
4401            if let IndexLabel::Datetime64(n) = label {
4402                by_value.entry(*n).or_default().push(i);
4403            }
4404        }
4405        let mut positions = Vec::<isize>::new();
4406        let mut missing = Vec::<usize>::new();
4407        for (idx, target) in targets.iter().enumerate() {
4408            if let Some(matches) = by_value.get(target) {
4409                positions.extend(
4410                    matches
4411                        .iter()
4412                        .map(|p| isize::try_from(*p).unwrap_or(isize::MAX)),
4413                );
4414            } else {
4415                positions.push(-1);
4416                missing.push(idx);
4417            }
4418        }
4419        (positions, missing)
4420    }
4421
4422    /// Alias for [`get_indexer`], matching
4423    /// `pd.DatetimeIndex.get_indexer_for(targets)`.
4424    #[must_use]
4425    pub fn get_indexer_for(&self, targets: &[i64]) -> Vec<isize> {
4426        self.get_indexer(targets)
4427    }
4428
4429    /// Locate each label in `targets`, matching
4430    /// `pd.DatetimeIndex.get_indexer(targets)`. Returns `Vec<isize>` where
4431    /// `-1` means "missing".
4432    #[must_use]
4433    pub fn get_indexer(&self, targets: &[i64]) -> Vec<isize> {
4434        let labels = self.index.labels();
4435        let mut positions = FxHashMap::<i64, isize>::default();
4436        for (i, label) in labels.iter().enumerate() {
4437            if let IndexLabel::Datetime64(n) = label {
4438                positions
4439                    .entry(*n)
4440                    .or_insert_with(|| isize::try_from(i).unwrap_or(isize::MAX));
4441            }
4442        }
4443        targets
4444            .iter()
4445            .map(|n| positions.get(n).copied().unwrap_or(-1))
4446            .collect()
4447    }
4448
4449    /// Position of the slice boundary for `label` and `side`, matching
4450    /// `pd.DatetimeIndex.get_slice_bound(label, side)`. Mirrors
4451    /// `searchsorted(label, side)`.
4452    pub fn get_slice_bound(&self, label: i64, side: &str) -> Result<usize, IndexError> {
4453        self.searchsorted(label, side)
4454    }
4455
4456    /// Half-open positional range for a label slice, matching
4457    /// `pd.DatetimeIndex.slice_indexer(start, end)`. Wraps slice_locs
4458    /// in a `std::ops::Range<usize>`.
4459    pub fn slice_indexer(
4460        &self,
4461        start: i64,
4462        end: i64,
4463    ) -> Result<std::ops::Range<usize>, IndexError> {
4464        let (left, right) = self.slice_locs(start, end)?;
4465        Ok(left..right)
4466    }
4467
4468    /// Find positions of `[start, end]` for a label slice, matching
4469    /// `pd.DatetimeIndex.slice_locs(start, end)`. Requires the index to
4470    /// be monotonically increasing; non-monotonic input rejects.
4471    pub fn slice_locs(&self, start: i64, end: i64) -> Result<(usize, usize), IndexError> {
4472        if !self.is_monotonic_increasing() {
4473            return Err(IndexError::InvalidArgument(
4474                "slice_locs requires a monotonic increasing DatetimeIndex".to_owned(),
4475            ));
4476        }
4477        let left = self.searchsorted(start, "left")?;
4478        let right = self.searchsorted(end, "right")?;
4479        Ok((left, right))
4480    }
4481
4482    /// Convert to a flat [`Index`], matching
4483    /// `pd.DatetimeIndex.to_flat_index()`. Clone-as-Index because the
4484    /// underlying storage is already a flat Index of Datetime64 labels.
4485    #[must_use]
4486    pub fn to_flat_index(&self) -> Index {
4487        self.index.clone()
4488    }
4489
4490    /// String accessor for the flat datetime labels.
4491    #[must_use]
4492    pub fn r#str(&self) -> IndexStringAccessor<'_> {
4493        IndexStringAccessor::owned(self.to_flat_index())
4494    }
4495
4496    /// One-column row materialization, matching `pd.DatetimeIndex.to_frame(index=False)`.
4497    #[must_use]
4498    pub fn to_frame(&self) -> Vec<Vec<IndexLabel>> {
4499        self.to_flat_index().to_frame()
4500    }
4501
4502    /// Series-shaped materialization using datetime labels as both index and values.
4503    #[must_use]
4504    pub fn to_series(&self) -> Vec<(IndexLabel, IndexLabel)> {
4505        self.to_flat_index().to_series()
4506    }
4507
4508    /// Whether any datetime label coerces to true.
4509    #[must_use]
4510    pub fn any(&self) -> bool {
4511        self.to_flat_index().any()
4512    }
4513
4514    /// Whether all datetime labels coerce to true.
4515    #[must_use]
4516    pub fn all(&self) -> bool {
4517        self.to_flat_index().all()
4518    }
4519
4520    /// Get labels for a level. DatetimeIndex is flat and only accepts level 0.
4521    pub fn get_level_values(&self, level: usize) -> Result<Index, IndexError> {
4522        self.to_flat_index().get_level_values(level)
4523    }
4524
4525    /// Drop a level. DatetimeIndex is flat, so removing its only level is invalid.
4526    pub fn droplevel(&self, level: usize) -> Result<Index, IndexError> {
4527        self.to_flat_index().droplevel(level)
4528    }
4529
4530    /// Group equal datetime labels into position buckets.
4531    #[must_use]
4532    pub fn groupby(&self) -> HashMap<IndexLabel, Vec<usize>> {
4533        self.to_flat_index().groupby()
4534    }
4535
4536    /// Apply a function to each datetime label, returning a flat Index.
4537    #[must_use]
4538    pub fn map<F>(&self, func: F) -> Index
4539    where
4540        F: Fn(&IndexLabel) -> IndexLabel,
4541    {
4542        self.to_flat_index().map(func)
4543    }
4544
4545    /// Cast datetime labels to a pandas dtype string, returning a flat Index.
4546    pub fn astype(&self, dtype: &str) -> Result<Index, IndexError> {
4547        self.to_flat_index().astype(dtype)
4548    }
4549
4550    /// Nearest preceding-or-equal datetime label lookup.
4551    #[must_use]
4552    pub fn asof(&self, key: &IndexLabel) -> Option<IndexLabel> {
4553        self.to_flat_index().asof(key)
4554    }
4555
4556    /// Locate nearest preceding-or-equal datetime positions for each target label.
4557    #[must_use]
4558    pub fn asof_locs(&self, where_index: &Index, mask: Option<&[bool]>) -> Vec<Option<usize>> {
4559        self.to_flat_index().asof_locs(where_index, mask)
4560    }
4561
4562    /// Drop datetime labels, returning a flat Index.
4563    #[must_use]
4564    pub fn drop(&self, labels_to_drop: &[IndexLabel]) -> Index {
4565        self.to_flat_index().drop(labels_to_drop)
4566    }
4567
4568    /// Join datetime labels with another flat Index.
4569    pub fn join(&self, other: &Index, how: &str) -> Result<Index, IndexError> {
4570        self.to_flat_index().join(other, how)
4571    }
4572
4573    /// Sort datetime labels and return the positional sorter.
4574    #[must_use]
4575    pub fn sortlevel(&self) -> (Index, Vec<usize>) {
4576        self.to_flat_index().sortlevel()
4577    }
4578
4579    /// Returns a clone, matching `pd.DatetimeIndex.view()`. FrankenPandas
4580    /// owns its label storage so view materializes a fresh clone instead
4581    /// of an aliasing reference.
4582    #[must_use]
4583    pub fn view(&self) -> Self {
4584        self.clone()
4585    }
4586
4587    /// Identity transpose for a 1D index, matching
4588    /// `pd.DatetimeIndex.transpose()`.
4589    #[must_use]
4590    pub fn transpose(&self) -> Self {
4591        self.clone()
4592    }
4593
4594    /// Alias for `transpose`, matching `pd.DatetimeIndex.T`.
4595    #[allow(non_snake_case)]
4596    #[must_use]
4597    pub fn T(&self) -> Self {
4598        self.transpose()
4599    }
4600
4601    /// Flatten labels to nanoseconds-since-epoch with NAT preserved,
4602    /// matching `pd.DatetimeIndex.ravel()`.
4603    #[must_use]
4604    pub fn ravel(&self) -> Vec<Option<i64>> {
4605        self.values()
4606    }
4607
4608    /// Number of levels in this Index, matching `pd.DatetimeIndex.nlevels`.
4609    /// Always `1` because DatetimeIndex is a single-level index.
4610    #[must_use]
4611    pub fn nlevels(&self) -> usize {
4612        1
4613    }
4614
4615    /// Identity dtype-reinference for typed indexes, matching
4616    /// `pd.DatetimeIndex.infer_objects()`.
4617    #[must_use]
4618    pub fn infer_objects(&self) -> Self {
4619        self.clone()
4620    }
4621
4622    /// Drop NAT labels, matching `pd.DatetimeIndex.dropna()`. Non-datetime
4623    /// labels (which the wrapper rejects on construction) and `i64::MIN`
4624    /// sentinels are removed; surviving labels keep their order.
4625    pub fn dropna(&self) -> Self {
4626        let surviving: Vec<i64> = self
4627            .index
4628            .labels()
4629            .iter()
4630            .filter_map(|label| match label {
4631                IndexLabel::Datetime64(nanos) if *nanos != i64::MIN => Some(*nanos),
4632                _ => None,
4633            })
4634            .collect();
4635        let mut filtered = Self::new(surviving);
4636        if let Some(name) = self.name() {
4637            filtered = filtered.set_name(name);
4638        }
4639        filtered
4640    }
4641
4642    #[must_use]
4643    pub fn year(&self) -> Vec<Option<i32>> {
4644        use chrono::Datelike;
4645        map_datetime_labels(self.index.labels(), |dt| dt.year())
4646    }
4647
4648    #[must_use]
4649    pub fn month(&self) -> Vec<Option<u32>> {
4650        use chrono::Datelike;
4651        map_datetime_labels(self.index.labels(), |dt| dt.month())
4652    }
4653
4654    #[must_use]
4655    pub fn day(&self) -> Vec<Option<u32>> {
4656        use chrono::Datelike;
4657        map_datetime_labels(self.index.labels(), |dt| dt.day())
4658    }
4659
4660    /// Hour of day per label (0..=23), matching `pd.DatetimeIndex.hour`.
4661    #[must_use]
4662    pub fn hour(&self) -> Vec<Option<u32>> {
4663        use chrono::Timelike;
4664        map_datetime_labels(self.index.labels(), |dt| dt.hour())
4665    }
4666
4667    /// Minute of hour per label (0..=59), matching `pd.DatetimeIndex.minute`.
4668    #[must_use]
4669    pub fn minute(&self) -> Vec<Option<u32>> {
4670        use chrono::Timelike;
4671        map_datetime_labels(self.index.labels(), |dt| dt.minute())
4672    }
4673
4674    /// Second of minute per label (0..=59), matching `pd.DatetimeIndex.second`.
4675    #[must_use]
4676    pub fn second(&self) -> Vec<Option<u32>> {
4677        use chrono::Timelike;
4678        map_datetime_labels(self.index.labels(), |dt| dt.second())
4679    }
4680
4681    /// Microsecond component (0..=999_999), matching `pd.DatetimeIndex.microsecond`.
4682    /// Computed from the within-second nanosecond bucket: `nanos / 1_000`.
4683    #[must_use]
4684    pub fn microsecond(&self) -> Vec<Option<u32>> {
4685        use chrono::Timelike;
4686        map_datetime_labels(self.index.labels(), |dt| dt.nanosecond() / 1_000)
4687    }
4688
4689    /// Nanosecond component (0..=999), matching `pd.DatetimeIndex.nanosecond`.
4690    /// Computed from the within-second nanosecond bucket: `nanos % 1_000`.
4691    #[must_use]
4692    pub fn nanosecond(&self) -> Vec<Option<u32>> {
4693        use chrono::Timelike;
4694        map_datetime_labels(self.index.labels(), |dt| dt.nanosecond() % 1_000)
4695    }
4696
4697    /// Integer positions whose clock time equals `time`, matching
4698    /// `pd.DatetimeIndex.indexer_at_time(time)`.
4699    pub fn indexer_at_time(&self, time: &str) -> Result<Vec<usize>, IndexError> {
4700        let target = parse_time_of_day_nanos(time, "DatetimeIndex.indexer_at_time")?;
4701        Ok(self
4702            .index
4703            .labels()
4704            .iter()
4705            .enumerate()
4706            .filter_map(|(position, label)| {
4707                (datetime_label_time_nanos(label) == Some(target)).then_some(position)
4708            })
4709            .collect())
4710    }
4711
4712    /// Integer positions whose clock time falls between `start_time` and
4713    /// `end_time`, matching `pd.DatetimeIndex.indexer_between_time`.
4714    /// Ranges that cross midnight use pandas' wrap-around semantics.
4715    pub fn indexer_between_time(
4716        &self,
4717        start_time: &str,
4718        end_time: &str,
4719        include_start: bool,
4720        include_end: bool,
4721    ) -> Result<Vec<usize>, IndexError> {
4722        let start =
4723            parse_time_of_day_nanos(start_time, "DatetimeIndex.indexer_between_time start_time")?;
4724        let end = parse_time_of_day_nanos(end_time, "DatetimeIndex.indexer_between_time end_time")?;
4725        Ok(self
4726            .index
4727            .labels()
4728            .iter()
4729            .enumerate()
4730            .filter_map(|(position, label)| {
4731                datetime_label_time_nanos(label)
4732                    .filter(|time| {
4733                        time_nanos_in_between(*time, start, end, include_start, include_end)
4734                    })
4735                    .map(|_| position)
4736            })
4737            .collect())
4738    }
4739
4740    /// ISO 8601 week-of-year (1..=53), matching `pd.DatetimeIndex.week`
4741    /// (a deprecated pandas alias preserved for parity).
4742    #[must_use]
4743    pub fn week(&self) -> Vec<Option<u32>> {
4744        use chrono::Datelike;
4745        map_datetime_labels(self.index.labels(), |dt| dt.iso_week().week())
4746    }
4747
4748    /// ISO calendar `(year, week, weekday)` triples, matching
4749    /// `pd.DatetimeIndex.isocalendar()`. Weekday uses pandas' Monday=1
4750    /// through Sunday=7 convention.
4751    #[must_use]
4752    pub fn isocalendar(&self) -> Vec<Option<(i32, u32, u32)>> {
4753        use chrono::Datelike;
4754        map_datetime_labels(self.index.labels(), |dt| {
4755            let iso = dt.iso_week();
4756            (iso.year(), iso.week(), dt.weekday().number_from_monday())
4757        })
4758    }
4759
4760    /// Alias for [`week`], matching `pd.DatetimeIndex.weekofyear`.
4761    #[must_use]
4762    pub fn weekofyear(&self) -> Vec<Option<u32>> {
4763        self.week()
4764    }
4765
4766    /// Day of year (1..=366), matching `pd.DatetimeIndex.dayofyear`.
4767    #[must_use]
4768    pub fn dayofyear(&self) -> Vec<Option<u32>> {
4769        use chrono::Datelike;
4770        map_datetime_labels(self.index.labels(), |dt| dt.ordinal())
4771    }
4772
4773    /// Alias for [`dayofyear`], matching `pd.DatetimeIndex.day_of_year`.
4774    #[must_use]
4775    pub fn day_of_year(&self) -> Vec<Option<u32>> {
4776        self.dayofyear()
4777    }
4778
4779    /// Weekday number (Monday=0..Sunday=6), matching
4780    /// `pd.DatetimeIndex.dayofweek`.
4781    #[must_use]
4782    pub fn dayofweek(&self) -> Vec<Option<u32>> {
4783        use chrono::Datelike;
4784        map_datetime_labels(self.index.labels(), |dt| {
4785            dt.weekday().num_days_from_monday()
4786        })
4787    }
4788
4789    /// Alias for [`dayofweek`], matching `pd.DatetimeIndex.day_of_week`.
4790    #[must_use]
4791    pub fn day_of_week(&self) -> Vec<Option<u32>> {
4792        self.dayofweek()
4793    }
4794
4795    /// Alias for [`dayofweek`], matching `pd.DatetimeIndex.weekday`.
4796    #[must_use]
4797    pub fn weekday(&self) -> Vec<Option<u32>> {
4798        self.dayofweek()
4799    }
4800
4801    /// Calendar quarter (1..=4), matching `pd.DatetimeIndex.quarter`.
4802    #[must_use]
4803    pub fn quarter(&self) -> Vec<Option<u32>> {
4804        use chrono::Datelike;
4805        map_datetime_labels(self.index.labels(), |dt| (dt.month() - 1) / 3 + 1)
4806    }
4807
4808    /// Whether the year is a leap year, matching
4809    /// `pd.DatetimeIndex.is_leap_year`.
4810    #[must_use]
4811    pub fn is_leap_year(&self) -> Vec<Option<bool>> {
4812        use chrono::Datelike;
4813        map_datetime_labels(self.index.labels(), |dt| {
4814            chrono::NaiveDate::from_ymd_opt(dt.year(), 1, 1).is_some_and(|d| d.leap_year())
4815        })
4816    }
4817
4818    /// Number of days in the calendar month of each label,
4819    /// matching `pd.DatetimeIndex.days_in_month`.
4820    #[must_use]
4821    pub fn days_in_month(&self) -> Vec<Option<u32>> {
4822        use chrono::Datelike;
4823        map_datetime_labels(self.index.labels(), |dt| {
4824            days_in_calendar_month(dt.year(), dt.month())
4825        })
4826    }
4827
4828    /// Alias for [`days_in_month`], matching `pd.DatetimeIndex.daysinmonth`.
4829    #[must_use]
4830    pub fn daysinmonth(&self) -> Vec<Option<u32>> {
4831        self.days_in_month()
4832    }
4833
4834    /// Whether the day is the first of the month, matching
4835    /// `pd.DatetimeIndex.is_month_start`.
4836    #[must_use]
4837    pub fn is_month_start(&self) -> Vec<Option<bool>> {
4838        use chrono::Datelike;
4839        map_datetime_labels(self.index.labels(), |dt| dt.day() == 1)
4840    }
4841
4842    /// Whether the day is the last of the month, matching
4843    /// `pd.DatetimeIndex.is_month_end`.
4844    #[must_use]
4845    pub fn is_month_end(&self) -> Vec<Option<bool>> {
4846        use chrono::Datelike;
4847        map_datetime_labels(self.index.labels(), |dt| {
4848            dt.day() == days_in_calendar_month(dt.year(), dt.month())
4849        })
4850    }
4851
4852    /// Whether the timestamp is the first day of a quarter, matching
4853    /// `pd.DatetimeIndex.is_quarter_start`. Quarter starts: Jan/Apr/Jul/Oct day 1.
4854    #[must_use]
4855    pub fn is_quarter_start(&self) -> Vec<Option<bool>> {
4856        use chrono::Datelike;
4857        map_datetime_labels(self.index.labels(), |dt| {
4858            matches!(dt.month(), 1 | 4 | 7 | 10) && dt.day() == 1
4859        })
4860    }
4861
4862    /// Whether the timestamp is the last day of a quarter, matching
4863    /// `pd.DatetimeIndex.is_quarter_end`. Quarter ends: Mar/Jun/Sep/Dec last day.
4864    #[must_use]
4865    pub fn is_quarter_end(&self) -> Vec<Option<bool>> {
4866        use chrono::Datelike;
4867        map_datetime_labels(self.index.labels(), |dt| {
4868            matches!(dt.month(), 3 | 6 | 9 | 12)
4869                && dt.day() == days_in_calendar_month(dt.year(), dt.month())
4870        })
4871    }
4872
4873    /// Whether the timestamp is January 1, matching
4874    /// `pd.DatetimeIndex.is_year_start`.
4875    #[must_use]
4876    pub fn is_year_start(&self) -> Vec<Option<bool>> {
4877        use chrono::Datelike;
4878        map_datetime_labels(self.index.labels(), |dt| dt.month() == 1 && dt.day() == 1)
4879    }
4880
4881    /// Whether the timestamp is December 31, matching
4882    /// `pd.DatetimeIndex.is_year_end`.
4883    #[must_use]
4884    pub fn is_year_end(&self) -> Vec<Option<bool>> {
4885        use chrono::Datelike;
4886        map_datetime_labels(self.index.labels(), |dt| dt.month() == 12 && dt.day() == 31)
4887    }
4888
4889    /// Full English month name, matching `pd.DatetimeIndex.month_name()`.
4890    #[must_use]
4891    pub fn month_name(&self) -> Vec<Option<String>> {
4892        use chrono::Datelike;
4893        map_datetime_labels(self.index.labels(), |dt| {
4894            month_name_english(dt.month()).to_owned()
4895        })
4896    }
4897
4898    /// Full English weekday name, matching `pd.DatetimeIndex.day_name()`.
4899    #[must_use]
4900    pub fn day_name(&self) -> Vec<Option<String>> {
4901        use chrono::Datelike;
4902        map_datetime_labels(self.index.labels(), |dt| {
4903            weekday_name_english(dt.weekday()).to_owned()
4904        })
4905    }
4906
4907    /// Truncate every timestamp to midnight UTC, matching
4908    /// `pd.DatetimeIndex.normalize()`. NAT labels propagate.
4909    #[must_use]
4910    pub fn normalize(&self) -> Self {
4911        let nanos: Vec<i64> = self
4912            .index
4913            .labels()
4914            .iter()
4915            .map(|label| match label {
4916                IndexLabel::Datetime64(nanos) if *nanos != i64::MIN => {
4917                    let secs_per_day: i64 = 86_400;
4918                    let nanos_per_day: i64 = secs_per_day * 1_000_000_000;
4919                    nanos.div_euclid(nanos_per_day) * nanos_per_day
4920                }
4921                _ => i64::MIN,
4922            })
4923            .collect();
4924        let mut normalized = Self::new(nanos);
4925        if let Some(name) = self.name() {
4926            normalized = normalized.set_name(name);
4927        }
4928        normalized
4929    }
4930
4931    /// Whether every label is at midnight UTC (NAT counts as normalized),
4932    /// matching `pd.DatetimeIndex.is_normalized`.
4933    #[must_use]
4934    pub fn is_normalized(&self) -> bool {
4935        let nanos_per_day: i64 = 86_400 * 1_000_000_000;
4936        self.index.labels().iter().all(|label| match label {
4937            IndexLabel::Datetime64(nanos) => {
4938                *nanos == i64::MIN || nanos.rem_euclid(nanos_per_day) == 0
4939            }
4940            _ => true,
4941        })
4942    }
4943}
4944
4945fn month_name_english(month: u32) -> &'static str {
4946    match month {
4947        1 => "January",
4948        2 => "February",
4949        3 => "March",
4950        4 => "April",
4951        5 => "May",
4952        6 => "June",
4953        7 => "July",
4954        8 => "August",
4955        9 => "September",
4956        10 => "October",
4957        11 => "November",
4958        12 => "December",
4959        _ => "",
4960    }
4961}
4962
4963fn weekday_name_english(weekday: chrono::Weekday) -> &'static str {
4964    match weekday {
4965        chrono::Weekday::Mon => "Monday",
4966        chrono::Weekday::Tue => "Tuesday",
4967        chrono::Weekday::Wed => "Wednesday",
4968        chrono::Weekday::Thu => "Thursday",
4969        chrono::Weekday::Fri => "Friday",
4970        chrono::Weekday::Sat => "Saturday",
4971        chrono::Weekday::Sun => "Sunday",
4972    }
4973}
4974
4975fn days_in_calendar_month(year: i32, month: u32) -> u32 {
4976    let next_month = if month == 12 { 1 } else { month + 1 };
4977    let next_year = if month == 12 { year + 1 } else { year };
4978    let first_of_next = chrono::NaiveDate::from_ymd_opt(next_year, next_month, 1);
4979    let first_of_this = chrono::NaiveDate::from_ymd_opt(year, month, 1);
4980    match (first_of_next, first_of_this) {
4981        (Some(next), Some(this)) => (next - this).num_days() as u32,
4982        _ => 0,
4983    }
4984}
4985
4986/// Public pandas-style timedelta index wrapper.
4987#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
4988pub struct TimedeltaIndex {
4989    index: Index,
4990}
4991
4992impl TimedeltaIndex {
4993    #[must_use]
4994    pub fn new(nanos: Vec<i64>) -> Self {
4995        Self {
4996            index: Index::from_timedelta64(nanos),
4997        }
4998    }
4999
5000    pub fn from_index(index: Index) -> Result<Self, IndexError> {
5001        ensure_index_kind(
5002            &index,
5003            |label| matches!(label, IndexLabel::Timedelta64(_)),
5004            "TimedeltaIndex",
5005        )?;
5006        Ok(Self { index })
5007    }
5008
5009    #[must_use]
5010    pub fn as_index(&self) -> &Index {
5011        &self.index
5012    }
5013
5014    #[must_use]
5015    pub fn into_index(self) -> Index {
5016        self.index
5017    }
5018
5019    #[must_use]
5020    pub fn len(&self) -> usize {
5021        self.index.len()
5022    }
5023
5024    #[must_use]
5025    pub fn is_empty(&self) -> bool {
5026        self.index.is_empty()
5027    }
5028
5029    #[must_use]
5030    pub fn name(&self) -> Option<&str> {
5031        self.index.name()
5032    }
5033
5034    #[must_use]
5035    pub fn set_name(&self, name: &str) -> Self {
5036        Self {
5037            index: self.index.set_name(name),
5038        }
5039    }
5040
5041    #[must_use]
5042    pub fn set_names(&self, name: Option<&str>) -> Self {
5043        Self {
5044            index: self.index.set_names(name),
5045        }
5046    }
5047
5048    #[must_use]
5049    pub fn rename_index(&self, name: Option<&str>) -> Self {
5050        self.set_names(name)
5051    }
5052
5053    #[must_use]
5054    pub fn names(&self) -> Vec<Option<String>> {
5055        self.index.names()
5056    }
5057
5058    #[must_use]
5059    pub fn copy(&self) -> Self {
5060        self.clone()
5061    }
5062
5063    #[must_use]
5064    pub fn shape(&self) -> (usize,) {
5065        self.index.shape()
5066    }
5067
5068    #[must_use]
5069    pub fn size(&self) -> usize {
5070        self.index.size()
5071    }
5072
5073    #[must_use]
5074    pub fn empty(&self) -> bool {
5075        self.index.empty()
5076    }
5077
5078    #[must_use]
5079    pub fn dtype(&self) -> &'static str {
5080        "timedelta64[ns]"
5081    }
5082
5083    #[must_use]
5084    pub fn dtypes(&self) -> Vec<&'static str> {
5085        vec![self.dtype()]
5086    }
5087
5088    #[must_use]
5089    pub fn memory_usage(&self, deep: bool) -> usize {
5090        self.index.memory_usage(deep)
5091    }
5092
5093    #[must_use]
5094    pub fn nbytes(&self) -> usize {
5095        self.index.nbytes()
5096    }
5097
5098    #[must_use]
5099    pub fn hasnans(&self) -> bool {
5100        self.index.hasnans()
5101    }
5102
5103    #[must_use]
5104    pub fn isna(&self) -> Vec<bool> {
5105        self.index.isna()
5106    }
5107
5108    #[must_use]
5109    pub fn notna(&self) -> Vec<bool> {
5110        self.index.notna()
5111    }
5112
5113    #[must_use]
5114    pub fn is_unique(&self) -> bool {
5115        self.index.is_unique()
5116    }
5117
5118    #[must_use]
5119    pub fn has_duplicates(&self) -> bool {
5120        self.index.has_duplicates()
5121    }
5122
5123    #[must_use]
5124    pub fn is_monotonic_increasing(&self) -> bool {
5125        self.index.is_monotonic_increasing()
5126    }
5127
5128    #[must_use]
5129    pub fn is_monotonic(&self) -> bool {
5130        self.index.is_monotonic()
5131    }
5132
5133    #[must_use]
5134    pub fn is_monotonic_decreasing(&self) -> bool {
5135        self.index.is_monotonic_decreasing()
5136    }
5137
5138    #[must_use]
5139    pub fn nunique(&self) -> usize {
5140        self.index.nunique()
5141    }
5142
5143    #[must_use]
5144    pub fn nunique_with_dropna(&self, dropna: bool) -> usize {
5145        self.index.nunique_with_dropna(dropna)
5146    }
5147
5148    #[must_use]
5149    pub fn ndim(&self) -> usize {
5150        self.index.ndim()
5151    }
5152
5153    pub fn item(&self) -> Result<Option<i64>, IndexError> {
5154        match self.index.item()? {
5155            IndexLabel::Timedelta64(nanos) if nanos != Timedelta::NAT => Ok(Some(nanos)),
5156            IndexLabel::Timedelta64(_) => Ok(None),
5157            label => Err(IndexError::InvalidArgument(format!(
5158                "TimedeltaIndex item must be timedelta64, got {label}"
5159            ))),
5160        }
5161    }
5162
5163    #[must_use]
5164    pub fn is_(&self, other: &Self) -> bool {
5165        std::ptr::eq(self, other)
5166    }
5167
5168    #[must_use]
5169    pub fn equals(&self, other: &Self) -> bool {
5170        self.index.equals(&other.index)
5171    }
5172
5173    #[must_use]
5174    pub fn identical(&self, other: &Self) -> bool {
5175        self.index.identical(&other.index)
5176    }
5177
5178    #[must_use]
5179    pub fn holds_integer(&self) -> bool {
5180        false
5181    }
5182
5183    #[must_use]
5184    pub fn inferred_type(&self) -> &'static str {
5185        "timedelta64"
5186    }
5187
5188    #[must_use]
5189    pub fn is_boolean(&self) -> bool {
5190        false
5191    }
5192
5193    #[must_use]
5194    pub fn is_categorical(&self) -> bool {
5195        false
5196    }
5197
5198    #[must_use]
5199    pub fn is_floating(&self) -> bool {
5200        false
5201    }
5202
5203    #[must_use]
5204    pub fn is_integer(&self) -> bool {
5205        false
5206    }
5207
5208    #[must_use]
5209    pub fn is_interval(&self) -> bool {
5210        false
5211    }
5212
5213    #[must_use]
5214    pub fn is_numeric(&self) -> bool {
5215        false
5216    }
5217
5218    #[must_use]
5219    pub fn is_object(&self) -> bool {
5220        false
5221    }
5222
5223    #[must_use]
5224    pub fn nanos(&self) -> Vec<Option<i64>> {
5225        map_timedelta_labels(self.index.labels(), |nanos| nanos)
5226    }
5227
5228    #[must_use]
5229    pub fn values(&self) -> Vec<Option<i64>> {
5230        self.nanos()
5231    }
5232
5233    #[must_use]
5234    pub fn to_list(&self) -> Vec<Option<i64>> {
5235        self.nanos()
5236    }
5237
5238    #[must_use]
5239    pub fn tolist(&self) -> Vec<Option<i64>> {
5240        self.to_list()
5241    }
5242
5243    #[must_use]
5244    pub fn to_numpy(&self) -> Vec<Option<i64>> {
5245        self.nanos()
5246    }
5247
5248    #[must_use]
5249    pub fn array(&self) -> Vec<Option<i64>> {
5250        self.nanos()
5251    }
5252
5253    #[must_use]
5254    pub fn days(&self) -> Vec<Option<i64>> {
5255        map_timedelta_labels(self.index.labels(), |nanos| {
5256            nanos.div_euclid(Timedelta::NANOS_PER_DAY)
5257        })
5258    }
5259
5260    #[must_use]
5261    pub fn seconds(&self) -> Vec<Option<i64>> {
5262        map_timedelta_labels(self.index.labels(), |nanos| {
5263            nanos.rem_euclid(Timedelta::NANOS_PER_DAY) / Timedelta::NANOS_PER_SEC
5264        })
5265    }
5266
5267    #[must_use]
5268    pub fn total_seconds(&self) -> Vec<Option<f64>> {
5269        map_timedelta_labels(self.index.labels(), Timedelta::total_seconds)
5270    }
5271
5272    /// Calendar-style component rows, matching `pd.TimedeltaIndex.components`.
5273    /// NAT propagates as `None`.
5274    #[must_use]
5275    pub fn components(&self) -> Vec<Option<TimedeltaComponents>> {
5276        map_timedelta_labels(self.index.labels(), timedelta_components_for_index)
5277    }
5278
5279    /// Underlying nanosecond duration, matching `pd.TimedeltaIndex.asi8`.
5280    /// `Timedelta::NAT` is preserved at the sentinel value.
5281    #[must_use]
5282    pub fn asi8(&self) -> Vec<i64> {
5283        self.index
5284            .labels()
5285            .iter()
5286            .map(|label| match label {
5287                IndexLabel::Timedelta64(nanos) => *nanos,
5288                IndexLabel::Int64(_)
5289                | IndexLabel::Utf8(_)
5290                | IndexLabel::Datetime64(_)
5291                | IndexLabel::Null(_) => Timedelta::NAT,
5292            })
5293            .collect()
5294    }
5295
5296    /// Microseconds-within-second component (0..=999_999), matching
5297    /// `pd.TimedeltaIndex.microseconds`.
5298    #[must_use]
5299    pub fn microseconds(&self) -> Vec<Option<i64>> {
5300        map_timedelta_labels(self.index.labels(), |nanos| {
5301            nanos.rem_euclid(Timedelta::NANOS_PER_SEC) / 1_000
5302        })
5303    }
5304
5305    /// Nanoseconds-within-microsecond component (0..=999), matching
5306    /// `pd.TimedeltaIndex.nanoseconds`.
5307    #[must_use]
5308    pub fn nanoseconds(&self) -> Vec<Option<i64>> {
5309        map_timedelta_labels(self.index.labels(), |nanos| nanos.rem_euclid(1_000))
5310    }
5311
5312    /// Position of the maximum label, matching `pd.TimedeltaIndex.argmax()`.
5313    /// Skips NAT and returns the first-tied position to match pandas
5314    /// `skipna=True` default.
5315    pub fn argmax(&self) -> Result<usize, IndexError> {
5316        let labels = self.index.labels();
5317        let mut best: Option<usize> = None;
5318        for (i, label) in labels.iter().enumerate() {
5319            let nanos = match label {
5320                IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => *n,
5321                _ => continue,
5322            };
5323            best = Some(match best {
5324                Some(b) => match labels[b] {
5325                    IndexLabel::Timedelta64(prev) if nanos > prev => i,
5326                    _ => b,
5327                },
5328                None => i,
5329            });
5330        }
5331        best.ok_or_else(|| {
5332            IndexError::InvalidArgument("attempt to get argmax of an empty sequence".to_owned())
5333        })
5334    }
5335
5336    /// Position of the minimum label, matching `pd.TimedeltaIndex.argmin()`.
5337    pub fn argmin(&self) -> Result<usize, IndexError> {
5338        let labels = self.index.labels();
5339        let mut best: Option<usize> = None;
5340        for (i, label) in labels.iter().enumerate() {
5341            let nanos = match label {
5342                IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => *n,
5343                _ => continue,
5344            };
5345            best = Some(match best {
5346                Some(b) => match labels[b] {
5347                    IndexLabel::Timedelta64(prev) if nanos < prev => i,
5348                    _ => b,
5349                },
5350                None => i,
5351            });
5352        }
5353        best.ok_or_else(|| {
5354            IndexError::InvalidArgument("attempt to get argmin of an empty sequence".to_owned())
5355        })
5356    }
5357
5358    /// Positions that would sort the labels ascending, matching
5359    /// `pd.TimedeltaIndex.argsort()`.
5360    #[must_use]
5361    pub fn argsort(&self) -> Vec<usize> {
5362        self.index.argsort()
5363    }
5364
5365    /// First-seen unique labels, matching `pd.TimedeltaIndex.unique()`.
5366    pub fn unique(&self) -> Result<Self, IndexError> {
5367        Self::from_index(self.index.unique())
5368    }
5369
5370    /// Factorization, matching `pd.TimedeltaIndex.factorize()`. NAT inputs
5371    /// receive `-1` codes; uniques excludes NAT.
5372    pub fn factorize(&self) -> Result<(Vec<isize>, Self), IndexError> {
5373        let (codes, uniques) = self.index.factorize();
5374        Ok((codes, Self::from_index(uniques)?))
5375    }
5376
5377    /// Value counts, matching `pd.TimedeltaIndex.value_counts()`. NAT is
5378    /// dropped by default to match pandas.
5379    #[must_use]
5380    pub fn value_counts(&self) -> Vec<(IndexLabel, usize)> {
5381        self.index.value_counts()
5382    }
5383
5384    /// Duplicate mask per position, matching
5385    /// `pd.TimedeltaIndex.duplicated(keep)`.
5386    #[must_use]
5387    pub fn duplicated(&self, keep: DuplicateKeep) -> Vec<bool> {
5388        self.index.duplicated(keep)
5389    }
5390
5391    /// Drop duplicate labels, matching `pd.TimedeltaIndex.drop_duplicates()`.
5392    pub fn drop_duplicates(&self) -> Result<Self, IndexError> {
5393        Self::from_index(self.index.drop_duplicates())
5394    }
5395
5396    /// Replace positions where `cond` is `false` with `other`, matching
5397    /// `pd.TimedeltaIndex.where(cond, other)`. Pass `Timedelta::NAT` to
5398    /// insert NAT.
5399    pub fn r#where(&self, cond: &[bool], other: i64) -> Result<Self, IndexError> {
5400        let labels = self.index.labels();
5401        if cond.len() != labels.len() {
5402            return Err(IndexError::LengthMismatch {
5403                expected: labels.len(),
5404                actual: cond.len(),
5405                context: "where: cond length must match index length".to_owned(),
5406            });
5407        }
5408        let nanos: Vec<i64> = labels
5409            .iter()
5410            .zip(cond.iter())
5411            .map(|(label, &keep)| {
5412                if keep {
5413                    match label {
5414                        IndexLabel::Timedelta64(n) => *n,
5415                        _ => Timedelta::NAT,
5416                    }
5417                } else {
5418                    other
5419                }
5420            })
5421            .collect();
5422        let mut out = Self::new(nanos);
5423        if let Some(name) = self.name() {
5424            out = out.set_name(name);
5425        }
5426        Ok(out)
5427    }
5428
5429    /// Replace positions where `mask` is `true` with `value`, matching
5430    /// `pd.TimedeltaIndex.putmask(mask, value)`.
5431    pub fn putmask(&self, mask: &[bool], value: i64) -> Result<Self, IndexError> {
5432        let labels = self.index.labels();
5433        if mask.len() != labels.len() {
5434            return Err(IndexError::LengthMismatch {
5435                expected: labels.len(),
5436                actual: mask.len(),
5437                context: "putmask: mask length must match index length".to_owned(),
5438            });
5439        }
5440        let nanos: Vec<i64> = labels
5441            .iter()
5442            .zip(mask.iter())
5443            .map(|(label, &replace)| {
5444                if replace {
5445                    value
5446                } else {
5447                    match label {
5448                        IndexLabel::Timedelta64(n) => *n,
5449                        _ => Timedelta::NAT,
5450                    }
5451                }
5452            })
5453            .collect();
5454        let mut out = Self::new(nanos);
5455        if let Some(name) = self.name() {
5456            out = out.set_name(name);
5457        }
5458        Ok(out)
5459    }
5460
5461    /// Binary-search insertion position, matching
5462    /// `pd.TimedeltaIndex.searchsorted(value, side)`. The needle is a
5463    /// nanosecond duration; NAT needles raise.
5464    pub fn searchsorted(&self, value: i64, side: &str) -> Result<usize, IndexError> {
5465        self.index
5466            .searchsorted(&IndexLabel::Timedelta64(value), side)
5467    }
5468
5469    /// Insert `value` at position `loc`, matching
5470    /// `pd.TimedeltaIndex.insert(loc, value)`. `loc == len()` appends;
5471    /// `loc > len()` raises [`IndexError::OutOfBounds`].
5472    pub fn insert(&self, loc: usize, value: i64) -> Result<Self, IndexError> {
5473        let labels = self.index.labels();
5474        if loc > labels.len() {
5475            return Err(IndexError::OutOfBounds {
5476                position: loc,
5477                length: labels.len(),
5478            });
5479        }
5480        let mut nanos: Vec<i64> = labels
5481            .iter()
5482            .filter_map(|label| match label {
5483                IndexLabel::Timedelta64(n) => Some(*n),
5484                _ => None,
5485            })
5486            .collect();
5487        nanos.insert(loc, value);
5488        let mut out = Self::new(nanos);
5489        if let Some(name) = self.name() {
5490            out = out.set_name(name);
5491        }
5492        Ok(out)
5493    }
5494
5495    /// Stringify each label, matching `pd.TimedeltaIndex.format()`.
5496    /// Non-NAT labels render as a signed nanosecond integer; NAT renders
5497    /// as the `NaT` literal.
5498    #[must_use]
5499    pub fn format(&self) -> Vec<String> {
5500        self.index
5501            .labels()
5502            .iter()
5503            .map(|label| match label {
5504                IndexLabel::Timedelta64(nanos) if *nanos != Timedelta::NAT => nanos.to_string(),
5505                _ => "NaT".to_owned(),
5506            })
5507            .collect()
5508    }
5509
5510    /// Replace NAT positions with `value`, matching
5511    /// `pd.TimedeltaIndex.fillna(value)`. Preserves the index name.
5512    #[must_use]
5513    pub fn fillna(&self, value: i64) -> Self {
5514        let nanos: Vec<i64> = self
5515            .index
5516            .labels()
5517            .iter()
5518            .map(|label| match label {
5519                IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => *n,
5520                _ => value,
5521            })
5522            .collect();
5523        let mut out = Self::new(nanos);
5524        if let Some(name) = self.name() {
5525            out = out.set_name(name);
5526        }
5527        out
5528    }
5529
5530    /// Alias for [`isna`], matching `pd.TimedeltaIndex.isnull()`.
5531    #[must_use]
5532    pub fn isnull(&self) -> Vec<bool> {
5533        self.isna()
5534    }
5535
5536    /// Alias for [`notna`], matching `pd.TimedeltaIndex.notnull()`.
5537    #[must_use]
5538    pub fn notnull(&self) -> Vec<bool> {
5539        self.notna()
5540    }
5541
5542    /// Convert each label to a `chrono::Duration`, matching
5543    /// `pd.TimedeltaIndex.to_pytimedelta()`. NAT propagates as `None`.
5544    #[must_use]
5545    pub fn to_pytimedelta(&self) -> Vec<Option<chrono::Duration>> {
5546        self.index
5547            .labels()
5548            .iter()
5549            .map(|label| match label {
5550                IndexLabel::Timedelta64(nanos) if *nanos != Timedelta::NAT => {
5551                    Some(chrono::Duration::nanoseconds(*nanos))
5552                }
5553                _ => None,
5554            })
5555            .collect()
5556    }
5557
5558    /// Frequency string, matching `pd.TimedeltaIndex.freq`. FrankenPandas
5559    /// does not infer timedelta frequency yet so this returns `None`.
5560    #[must_use]
5561    pub fn freq(&self) -> Option<String> {
5562        None
5563    }
5564
5565    /// Frequency alias string, matching `pd.TimedeltaIndex.freqstr`.
5566    #[must_use]
5567    pub fn freqstr(&self) -> Option<String> {
5568        self.freq()
5569    }
5570
5571    /// Inferred frequency, matching `pd.TimedeltaIndex.inferred_freq`.
5572    #[must_use]
5573    pub fn inferred_freq(&self) -> Option<String> {
5574        None
5575    }
5576
5577    /// Cast to a different storage resolution, matching
5578    /// `pd.TimedeltaIndex.as_unit(unit)`. Only `"ns"` is supported.
5579    pub fn as_unit(&self, unit: &str) -> Result<Self, IndexError> {
5580        match unit {
5581            "ns" => Ok(self.clone()),
5582            other => Err(IndexError::InvalidArgument(format!(
5583                "as_unit: only 'ns' is supported by FrankenPandas's Timedelta64 storage; got {other:?}"
5584            ))),
5585        }
5586    }
5587
5588    /// Storage resolution unit, matching `pd.TimedeltaIndex.unit`. Always
5589    /// `"ns"`.
5590    #[must_use]
5591    pub fn unit(&self) -> &'static str {
5592        "ns"
5593    }
5594
5595    /// Resolution string, matching `pd.TimedeltaIndex.resolution`.
5596    /// Always `"nanosecond"`.
5597    #[must_use]
5598    pub fn resolution(&self) -> &'static str {
5599        "nanosecond"
5600    }
5601
5602    /// First position of `value`, matching `pd.TimedeltaIndex.get_loc(value)`.
5603    pub fn get_loc(&self, value: i64) -> Result<usize, IndexError> {
5604        // Binary-search a monotonic (AscendingTimedelta64) index via
5605        // Index::position; same first-match linear fallback when unsorted
5606        // (br-frankenpandas-idxdup).
5607        self.index
5608            .position(&IndexLabel::Timedelta64(value))
5609            .ok_or_else(|| {
5610                IndexError::InvalidArgument(format!("get_loc: {value} not in TimedeltaIndex"))
5611            })
5612    }
5613
5614    /// Set the index name, matching `pd.TimedeltaIndex.rename(name)`.
5615    #[must_use]
5616    pub fn rename(&self, name: &str) -> Self {
5617        self.set_name(name)
5618    }
5619
5620    /// Reindex against `target`, matching
5621    /// `pd.TimedeltaIndex.reindex(target)`.
5622    #[must_use]
5623    pub fn reindex(&self, target: &Self) -> (Self, Vec<isize>) {
5624        let labels: Vec<i64> = target
5625            .index
5626            .labels()
5627            .iter()
5628            .filter_map(|label| match label {
5629                IndexLabel::Timedelta64(n) => Some(*n),
5630                _ => None,
5631            })
5632            .collect();
5633        let indexer = self.get_indexer(&labels);
5634        (target.clone(), indexer)
5635    }
5636
5637    /// Locate every position matching each target, matching
5638    /// `pd.TimedeltaIndex.get_indexer_non_unique(targets)`.
5639    #[must_use]
5640    pub fn get_indexer_non_unique(&self, targets: &[i64]) -> (Vec<isize>, Vec<usize>) {
5641        let labels = self.index.labels();
5642        let mut by_value = FxHashMap::<i64, Vec<usize>>::default();
5643        for (i, label) in labels.iter().enumerate() {
5644            if let IndexLabel::Timedelta64(n) = label {
5645                by_value.entry(*n).or_default().push(i);
5646            }
5647        }
5648        let mut positions = Vec::<isize>::new();
5649        let mut missing = Vec::<usize>::new();
5650        for (idx, target) in targets.iter().enumerate() {
5651            if let Some(matches) = by_value.get(target) {
5652                positions.extend(
5653                    matches
5654                        .iter()
5655                        .map(|p| isize::try_from(*p).unwrap_or(isize::MAX)),
5656                );
5657            } else {
5658                positions.push(-1);
5659                missing.push(idx);
5660            }
5661        }
5662        (positions, missing)
5663    }
5664
5665    /// Alias for [`get_indexer`], matching
5666    /// `pd.TimedeltaIndex.get_indexer_for(targets)`.
5667    #[must_use]
5668    pub fn get_indexer_for(&self, targets: &[i64]) -> Vec<isize> {
5669        self.get_indexer(targets)
5670    }
5671
5672    /// Locate each label in `targets`, matching
5673    /// `pd.TimedeltaIndex.get_indexer(targets)`.
5674    #[must_use]
5675    pub fn get_indexer(&self, targets: &[i64]) -> Vec<isize> {
5676        let labels = self.index.labels();
5677        let mut positions = FxHashMap::<i64, isize>::default();
5678        for (i, label) in labels.iter().enumerate() {
5679            if let IndexLabel::Timedelta64(n) = label {
5680                positions
5681                    .entry(*n)
5682                    .or_insert_with(|| isize::try_from(i).unwrap_or(isize::MAX));
5683            }
5684        }
5685        targets
5686            .iter()
5687            .map(|n| positions.get(n).copied().unwrap_or(-1))
5688            .collect()
5689    }
5690
5691    /// Position of the slice boundary for `label` and `side`, matching
5692    /// `pd.TimedeltaIndex.get_slice_bound(label, side)`. Mirrors
5693    /// `searchsorted(label, side)`.
5694    pub fn get_slice_bound(&self, label: i64, side: &str) -> Result<usize, IndexError> {
5695        self.searchsorted(label, side)
5696    }
5697
5698    /// Half-open positional range for a label slice, matching
5699    /// `pd.TimedeltaIndex.slice_indexer(start, end)`.
5700    pub fn slice_indexer(
5701        &self,
5702        start: i64,
5703        end: i64,
5704    ) -> Result<std::ops::Range<usize>, IndexError> {
5705        let (left, right) = self.slice_locs(start, end)?;
5706        Ok(left..right)
5707    }
5708
5709    /// Find positions of `[start, end]` for a label slice, matching
5710    /// `pd.TimedeltaIndex.slice_locs(start, end)`. Requires the index to
5711    /// be monotonically increasing.
5712    pub fn slice_locs(&self, start: i64, end: i64) -> Result<(usize, usize), IndexError> {
5713        if !self.is_monotonic_increasing() {
5714            return Err(IndexError::InvalidArgument(
5715                "slice_locs requires a monotonic increasing TimedeltaIndex".to_owned(),
5716            ));
5717        }
5718        let left = self.searchsorted(start, "left")?;
5719        let right = self.searchsorted(end, "right")?;
5720        Ok((left, right))
5721    }
5722
5723    /// Convert to a flat [`Index`], matching
5724    /// `pd.TimedeltaIndex.to_flat_index()`.
5725    #[must_use]
5726    pub fn to_flat_index(&self) -> Index {
5727        self.index.clone()
5728    }
5729
5730    /// String accessor for the flat timedelta labels.
5731    #[must_use]
5732    pub fn r#str(&self) -> IndexStringAccessor<'_> {
5733        IndexStringAccessor::owned(self.to_flat_index())
5734    }
5735
5736    /// One-column row materialization, matching `pd.TimedeltaIndex.to_frame(index=False)`.
5737    #[must_use]
5738    pub fn to_frame(&self) -> Vec<Vec<IndexLabel>> {
5739        self.to_flat_index().to_frame()
5740    }
5741
5742    /// Series-shaped materialization using timedelta labels as both index and values.
5743    #[must_use]
5744    pub fn to_series(&self) -> Vec<(IndexLabel, IndexLabel)> {
5745        self.to_flat_index().to_series()
5746    }
5747
5748    /// Whether any timedelta label coerces to true.
5749    #[must_use]
5750    pub fn any(&self) -> bool {
5751        self.to_flat_index().any()
5752    }
5753
5754    /// Whether all timedelta labels coerce to true.
5755    #[must_use]
5756    pub fn all(&self) -> bool {
5757        self.to_flat_index().all()
5758    }
5759
5760    /// Get labels for a level. TimedeltaIndex is flat and only accepts level 0.
5761    pub fn get_level_values(&self, level: usize) -> Result<Index, IndexError> {
5762        self.to_flat_index().get_level_values(level)
5763    }
5764
5765    /// Drop a level. TimedeltaIndex is flat, so removing its only level is invalid.
5766    pub fn droplevel(&self, level: usize) -> Result<Index, IndexError> {
5767        self.to_flat_index().droplevel(level)
5768    }
5769
5770    /// Group equal timedelta labels into position buckets.
5771    #[must_use]
5772    pub fn groupby(&self) -> HashMap<IndexLabel, Vec<usize>> {
5773        self.to_flat_index().groupby()
5774    }
5775
5776    /// Apply a function to each timedelta label, returning a flat Index.
5777    #[must_use]
5778    pub fn map<F>(&self, func: F) -> Index
5779    where
5780        F: Fn(&IndexLabel) -> IndexLabel,
5781    {
5782        self.to_flat_index().map(func)
5783    }
5784
5785    /// Cast timedelta labels to a pandas dtype string, returning a flat Index.
5786    pub fn astype(&self, dtype: &str) -> Result<Index, IndexError> {
5787        self.to_flat_index().astype(dtype)
5788    }
5789
5790    /// Nearest preceding-or-equal timedelta label lookup.
5791    #[must_use]
5792    pub fn asof(&self, key: &IndexLabel) -> Option<IndexLabel> {
5793        self.to_flat_index().asof(key)
5794    }
5795
5796    /// Locate nearest preceding-or-equal timedelta positions for each target label.
5797    #[must_use]
5798    pub fn asof_locs(&self, where_index: &Index, mask: Option<&[bool]>) -> Vec<Option<usize>> {
5799        self.to_flat_index().asof_locs(where_index, mask)
5800    }
5801
5802    /// Drop timedelta labels, returning a flat Index.
5803    #[must_use]
5804    pub fn drop(&self, labels_to_drop: &[IndexLabel]) -> Index {
5805        self.to_flat_index().drop(labels_to_drop)
5806    }
5807
5808    /// Join timedelta labels with another flat Index.
5809    pub fn join(&self, other: &Index, how: &str) -> Result<Index, IndexError> {
5810        self.to_flat_index().join(other, how)
5811    }
5812
5813    /// Sort timedelta labels and return the positional sorter.
5814    #[must_use]
5815    pub fn sortlevel(&self) -> (Index, Vec<usize>) {
5816        self.to_flat_index().sortlevel()
5817    }
5818
5819    /// Returns a clone, matching `pd.TimedeltaIndex.view()`.
5820    #[must_use]
5821    pub fn view(&self) -> Self {
5822        self.clone()
5823    }
5824
5825    /// Identity transpose for a 1D index, matching
5826    /// `pd.TimedeltaIndex.transpose()`.
5827    #[must_use]
5828    pub fn transpose(&self) -> Self {
5829        self.clone()
5830    }
5831
5832    /// Alias for `transpose`, matching `pd.TimedeltaIndex.T`.
5833    #[allow(non_snake_case)]
5834    #[must_use]
5835    pub fn T(&self) -> Self {
5836        self.transpose()
5837    }
5838
5839    /// Flatten labels to nanosecond durations with NAT preserved,
5840    /// matching `pd.TimedeltaIndex.ravel()`.
5841    #[must_use]
5842    pub fn ravel(&self) -> Vec<Option<i64>> {
5843        self.values()
5844    }
5845
5846    /// Number of levels, matching `pd.TimedeltaIndex.nlevels`. Always `1`.
5847    #[must_use]
5848    pub fn nlevels(&self) -> usize {
5849        1
5850    }
5851
5852    /// Identity dtype-reinference for typed indexes, matching
5853    /// `pd.TimedeltaIndex.infer_objects()`.
5854    #[must_use]
5855    pub fn infer_objects(&self) -> Self {
5856        self.clone()
5857    }
5858
5859    /// Drop NAT labels, matching `pd.TimedeltaIndex.dropna()`.
5860    pub fn dropna(&self) -> Self {
5861        let surviving: Vec<i64> = self
5862            .index
5863            .labels()
5864            .iter()
5865            .filter_map(|label| match label {
5866                IndexLabel::Timedelta64(nanos) if *nanos != Timedelta::NAT => Some(*nanos),
5867                _ => None,
5868            })
5869            .collect();
5870        let mut filtered = Self::new(surviving);
5871        if let Some(name) = self.name() {
5872            filtered = filtered.set_name(name);
5873        }
5874        filtered
5875    }
5876
5877    /// Pick labels at the given positions, matching
5878    /// `pd.TimedeltaIndex.take()`. Out-of-bounds positions raise
5879    /// [`IndexError::OutOfBounds`].
5880    pub fn take(&self, positions: &[usize]) -> Result<Self, IndexError> {
5881        let labels = self.index.labels();
5882        for &p in positions {
5883            if p >= labels.len() {
5884                return Err(IndexError::OutOfBounds {
5885                    position: p,
5886                    length: labels.len(),
5887                });
5888            }
5889        }
5890        let nanos: Vec<i64> = positions
5891            .iter()
5892            .map(|&p| match labels[p] {
5893                IndexLabel::Timedelta64(n) => n,
5894                _ => Timedelta::NAT,
5895            })
5896            .collect();
5897        let mut out = Self::new(nanos);
5898        if let Some(name) = self.name() {
5899            out = out.set_name(name);
5900        }
5901        Ok(out)
5902    }
5903
5904    /// Repeat each label `repeats` times, matching
5905    /// `pd.TimedeltaIndex.repeat()`.
5906    #[must_use]
5907    pub fn repeat(&self, repeats: usize) -> Self {
5908        let mut out = Vec::with_capacity(self.len() * repeats);
5909        for label in self.index.labels() {
5910            if let IndexLabel::Timedelta64(n) = label {
5911                for _ in 0..repeats {
5912                    out.push(*n);
5913                }
5914            }
5915        }
5916        let mut result = Self::new(out);
5917        if let Some(name) = self.name() {
5918            result = result.set_name(name);
5919        }
5920        result
5921    }
5922
5923    /// Per-position membership mask, matching
5924    /// `pd.TimedeltaIndex.isin(values)`. `values` is interpreted as a slice
5925    /// of nanosecond durations; pass `Timedelta::NAT` to test for NAT.
5926    #[must_use]
5927    pub fn isin(&self, values: &[i64]) -> Vec<bool> {
5928        let needle: FxHashSet<i64> = values.iter().copied().collect();
5929        self.index
5930            .labels()
5931            .iter()
5932            .map(|label| match label {
5933                IndexLabel::Timedelta64(n) => needle.contains(n),
5934                _ => false,
5935            })
5936            .collect()
5937    }
5938
5939    /// Concatenate with another TimedeltaIndex, matching
5940    /// `pd.TimedeltaIndex.append(other)`. Preserves the index name when both
5941    /// operands share it; otherwise pandas drops the name.
5942    #[must_use]
5943    pub fn append(&self, other: &Self) -> Self {
5944        let mut nanos: Vec<i64> = self
5945            .index
5946            .labels()
5947            .iter()
5948            .filter_map(|label| match label {
5949                IndexLabel::Timedelta64(n) => Some(*n),
5950                _ => None,
5951            })
5952            .collect();
5953        nanos.extend(other.index.labels().iter().filter_map(|label| match label {
5954            IndexLabel::Timedelta64(n) => Some(*n),
5955            _ => None,
5956        }));
5957        let mut out = Self::new(nanos);
5958        if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
5959            out = out.set_name(name);
5960        }
5961        out
5962    }
5963
5964    /// Minimum non-NAT label, matching `pd.TimedeltaIndex.min()`.
5965    #[must_use]
5966    pub fn min(&self) -> Option<i64> {
5967        self.index
5968            .labels()
5969            .iter()
5970            .filter_map(|label| match label {
5971                IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => Some(*n),
5972                _ => None,
5973            })
5974            .min()
5975    }
5976
5977    /// Shift each label by `periods` units of `freq_nanos`, matching
5978    /// `pd.TimedeltaIndex.shift(periods, freq)`. NAT propagates as NAT.
5979    #[must_use]
5980    pub fn shift(&self, periods: i64, freq_nanos: i64) -> Self {
5981        let delta = periods.saturating_mul(freq_nanos);
5982        let nanos: Vec<i64> = self
5983            .index
5984            .labels()
5985            .iter()
5986            .map(|label| match label {
5987                IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => n.saturating_add(delta),
5988                _ => Timedelta::NAT,
5989            })
5990            .collect();
5991        let mut out = Self::new(nanos);
5992        if let Some(name) = self.name() {
5993            out = out.set_name(name);
5994        }
5995        out
5996    }
5997
5998    /// Positional first differences, matching `pd.TimedeltaIndex.diff()`.
5999    /// NAT inputs propagate and signed `periods` follows pandas' lookup
6000    /// direction.
6001    #[must_use]
6002    pub fn diff(&self, periods: i64) -> Self {
6003        let labels = self.index.labels();
6004        optional_diffs_to_timedelta_index(
6005            positional_diff(labels.len(), periods, |current, previous| {
6006                match (&labels[current], &labels[previous]) {
6007                    (
6008                        IndexLabel::Timedelta64(current_nanos),
6009                        IndexLabel::Timedelta64(previous_nanos),
6010                    ) if *current_nanos != Timedelta::NAT && *previous_nanos != Timedelta::NAT => {
6011                        current_nanos.checked_sub(*previous_nanos)
6012                    }
6013                    _ => None,
6014                }
6015            }),
6016            self.name(),
6017        )
6018    }
6019
6020    fn round_fixed_freq(&self, freq: &str, mode: TemporalRoundMode) -> Result<Self, IndexError> {
6021        let unit_nanos = parse_fixed_temporal_freq(freq, "TimedeltaIndex rounding")?;
6022        let nanos: Vec<i64> = self
6023            .index
6024            .labels()
6025            .iter()
6026            .map(|label| match label {
6027                IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => {
6028                    round_nanos_to_unit(*n, unit_nanos, mode)
6029                }
6030                _ => Timedelta::NAT,
6031            })
6032            .collect();
6033        let mut out = Self::new(nanos);
6034        if let Some(name) = self.name() {
6035            out = out.set_name(name);
6036        }
6037        Ok(out)
6038    }
6039
6040    /// Round timedeltas down to a fixed pandas frequency.
6041    pub fn floor(&self, freq: &str) -> Result<Self, IndexError> {
6042        self.round_fixed_freq(freq, TemporalRoundMode::Floor)
6043    }
6044
6045    /// Round timedeltas up to a fixed pandas frequency.
6046    pub fn ceil(&self, freq: &str) -> Result<Self, IndexError> {
6047        self.round_fixed_freq(freq, TemporalRoundMode::Ceil)
6048    }
6049
6050    /// Round timedeltas to the nearest fixed pandas frequency, using half-even ties.
6051    pub fn round(&self, freq: &str) -> Result<Self, IndexError> {
6052        self.round_fixed_freq(freq, TemporalRoundMode::Round)
6053    }
6054
6055    /// Average non-NAT label as nanosecond duration, matching
6056    /// `pd.TimedeltaIndex.mean()`. Empty / all-NAT returns `None`.
6057    #[must_use]
6058    pub fn mean(&self) -> Option<i64> {
6059        let mut total: i128 = 0;
6060        let mut count: i128 = 0;
6061        for label in self.index.labels() {
6062            if let IndexLabel::Timedelta64(n) = label
6063                && *n != Timedelta::NAT
6064            {
6065                total += i128::from(*n);
6066                count += 1;
6067            }
6068        }
6069        if count == 0 {
6070            return None;
6071        }
6072        i64::try_from(total / count).ok()
6073    }
6074
6075    /// Sum of non-NAT labels as nanosecond duration, matching
6076    /// `pd.TimedeltaIndex.sum()`. Returns `Some(0)` for empty inputs to
6077    /// match pandas. Sum is computed in `i128` to avoid overflow before
6078    /// narrowing back to `i64`.
6079    #[must_use]
6080    pub fn sum(&self) -> Option<i64> {
6081        let mut total: i128 = 0;
6082        for label in self.index.labels() {
6083            if let IndexLabel::Timedelta64(n) = label
6084                && *n != Timedelta::NAT
6085            {
6086                total += i128::from(*n);
6087            }
6088        }
6089        i64::try_from(total).ok()
6090    }
6091
6092    /// Sample variance over non-NAT labels in nanoseconds-squared,
6093    /// matching `pd.TimedeltaIndex.var(ddof=1)`. Returns `None` for
6094    /// fewer than two non-NAT entries.
6095    #[must_use]
6096    pub fn var(&self) -> Option<f64> {
6097        let nanos: Vec<f64> = self
6098            .index
6099            .labels()
6100            .iter()
6101            .filter_map(|label| match label {
6102                IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => Some(*n as f64),
6103                _ => None,
6104            })
6105            .collect();
6106        if nanos.len() < 2 {
6107            return None;
6108        }
6109        let mean = nanos.iter().sum::<f64>() / nanos.len() as f64;
6110        Some(nanos.iter().map(|n| (n - mean).powi(2)).sum::<f64>() / (nanos.len() as f64 - 1.0))
6111    }
6112
6113    /// Sample standard deviation of non-NAT labels in nanoseconds,
6114    /// matching `pd.TimedeltaIndex.std(ddof=1)`. Returns `None` for
6115    /// fewer than two non-NAT entries.
6116    #[must_use]
6117    pub fn std(&self) -> Option<i64> {
6118        let nanos: Vec<f64> = self
6119            .index
6120            .labels()
6121            .iter()
6122            .filter_map(|label| match label {
6123                IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => Some(*n as f64),
6124                _ => None,
6125            })
6126            .collect();
6127        if nanos.len() < 2 {
6128            return None;
6129        }
6130        let mean = nanos.iter().sum::<f64>() / nanos.len() as f64;
6131        let var =
6132            nanos.iter().map(|n| (n - mean).powi(2)).sum::<f64>() / (nanos.len() as f64 - 1.0);
6133        Some(var.sqrt() as i64)
6134    }
6135
6136    /// Median non-NAT label, matching `pd.TimedeltaIndex.median()`.
6137    #[must_use]
6138    pub fn median(&self) -> Option<i64> {
6139        let mut nanos: Vec<i64> = self
6140            .index
6141            .labels()
6142            .iter()
6143            .filter_map(|label| match label {
6144                IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => Some(*n),
6145                _ => None,
6146            })
6147            .collect();
6148        if nanos.is_empty() {
6149            return None;
6150        }
6151        nanos.sort_unstable();
6152        let mid = nanos.len() / 2;
6153        if nanos.len() % 2 == 1 {
6154            Some(nanos[mid])
6155        } else {
6156            let total = i128::from(nanos[mid - 1]) + i128::from(nanos[mid]);
6157            i64::try_from(total / 2).ok()
6158        }
6159    }
6160
6161    /// Maximum non-NAT label, matching `pd.TimedeltaIndex.max()`.
6162    #[must_use]
6163    pub fn max(&self) -> Option<i64> {
6164        self.index
6165            .labels()
6166            .iter()
6167            .filter_map(|label| match label {
6168                IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => Some(*n),
6169                _ => None,
6170            })
6171            .max()
6172    }
6173
6174    /// Labels present in both indexes, matching
6175    /// `pd.TimedeltaIndex.intersection(other)`.
6176    #[must_use]
6177    pub fn intersection(&self, other: &Self) -> Self {
6178        let other_set: FxHashSet<i64> = other
6179            .index
6180            .labels()
6181            .iter()
6182            .filter_map(|label| match label {
6183                IndexLabel::Timedelta64(n) => Some(*n),
6184                _ => None,
6185            })
6186            .collect();
6187        let mut seen = FxHashSet::<i64>::default();
6188        let nanos: Vec<i64> = self
6189            .index
6190            .labels()
6191            .iter()
6192            .filter_map(|label| match label {
6193                IndexLabel::Timedelta64(n) if other_set.contains(n) && seen.insert(*n) => Some(*n),
6194                _ => None,
6195            })
6196            .collect();
6197        let mut out = Self::new(nanos);
6198        if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
6199            out = out.set_name(name);
6200        }
6201        out
6202    }
6203
6204    /// Labels from self followed by labels from other not already present,
6205    /// matching `pd.TimedeltaIndex.union(other)`.
6206    #[must_use]
6207    pub fn union(&self, other: &Self) -> Self {
6208        let mut seen = FxHashSet::<i64>::default();
6209        let mut nanos: Vec<i64> = Vec::new();
6210        for label in self
6211            .index
6212            .labels()
6213            .iter()
6214            .chain(other.index.labels().iter())
6215        {
6216            if let IndexLabel::Timedelta64(n) = label
6217                && seen.insert(*n)
6218            {
6219                nanos.push(*n);
6220            }
6221        }
6222        let mut out = Self::new(nanos);
6223        if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
6224            out = out.set_name(name);
6225        }
6226        out
6227    }
6228
6229    /// Labels in self not in other, matching
6230    /// `pd.TimedeltaIndex.difference(other)`.
6231    #[must_use]
6232    pub fn difference(&self, other: &Self) -> Self {
6233        let other_set: FxHashSet<i64> = other
6234            .index
6235            .labels()
6236            .iter()
6237            .filter_map(|label| match label {
6238                IndexLabel::Timedelta64(n) => Some(*n),
6239                _ => None,
6240            })
6241            .collect();
6242        let mut seen = FxHashSet::<i64>::default();
6243        let nanos: Vec<i64> = self
6244            .index
6245            .labels()
6246            .iter()
6247            .filter_map(|label| match label {
6248                IndexLabel::Timedelta64(n) if !other_set.contains(n) && seen.insert(*n) => Some(*n),
6249                _ => None,
6250            })
6251            .collect();
6252        let mut out = Self::new(nanos);
6253        // Per br-frankenpandas-6r1lq: difference preserves self.name only
6254        // (asymmetric op).
6255        if let Some(name) = self.name() {
6256            out = out.set_name(name);
6257        }
6258        out
6259    }
6260
6261    /// Labels in either but not both, matching
6262    /// `pd.TimedeltaIndex.symmetric_difference(other)`.
6263    #[must_use]
6264    pub fn symmetric_difference(&self, other: &Self) -> Self {
6265        let self_set: FxHashSet<i64> = self
6266            .index
6267            .labels()
6268            .iter()
6269            .filter_map(|label| match label {
6270                IndexLabel::Timedelta64(n) => Some(*n),
6271                _ => None,
6272            })
6273            .collect();
6274        let other_set: FxHashSet<i64> = other
6275            .index
6276            .labels()
6277            .iter()
6278            .filter_map(|label| match label {
6279                IndexLabel::Timedelta64(n) => Some(*n),
6280                _ => None,
6281            })
6282            .collect();
6283        let mut seen = FxHashSet::<i64>::default();
6284        let mut nanos: Vec<i64> = Vec::new();
6285        for label in self.index.labels() {
6286            if let IndexLabel::Timedelta64(n) = label
6287                && !other_set.contains(n)
6288                && seen.insert(*n)
6289            {
6290                nanos.push(*n);
6291            }
6292        }
6293        for label in other.index.labels() {
6294            if let IndexLabel::Timedelta64(n) = label
6295                && !self_set.contains(n)
6296                && seen.insert(*n)
6297            {
6298                nanos.push(*n);
6299            }
6300        }
6301        let mut out = Self::new(nanos);
6302        if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
6303            out = out.set_name(name);
6304        }
6305        out
6306    }
6307
6308    /// Sort labels ascending, matching `pd.TimedeltaIndex.sort_values()`.
6309    /// NAT sorts first (Timedelta::NAT sentinel) to match pandas
6310    /// `na_position='first'` default.
6311    #[must_use]
6312    pub fn sort_values(&self) -> Self {
6313        let mut nanos: Vec<i64> = self
6314            .index
6315            .labels()
6316            .iter()
6317            .filter_map(|label| match label {
6318                IndexLabel::Timedelta64(n) => Some(*n),
6319                _ => None,
6320            })
6321            .collect();
6322        nanos.sort_unstable();
6323        let mut out = Self::new(nanos);
6324        if let Some(name) = self.name() {
6325            out = out.set_name(name);
6326        }
6327        out
6328    }
6329
6330    /// Alias for `sort_values`, matching `pd.TimedeltaIndex.sort()`.
6331    #[must_use]
6332    pub fn sort(&self) -> Self {
6333        self.sort_values()
6334    }
6335
6336    /// Remove the label at the given position, matching
6337    /// `pd.TimedeltaIndex.delete(loc)`.
6338    pub fn delete(&self, loc: usize) -> Result<Self, IndexError> {
6339        let labels = self.index.labels();
6340        if loc >= labels.len() {
6341            return Err(IndexError::OutOfBounds {
6342                position: loc,
6343                length: labels.len(),
6344            });
6345        }
6346        let nanos: Vec<i64> = labels
6347            .iter()
6348            .enumerate()
6349            .filter(|(i, _)| *i != loc)
6350            .filter_map(|(_, label)| match label {
6351                IndexLabel::Timedelta64(n) => Some(*n),
6352                _ => None,
6353            })
6354            .collect();
6355        let mut out = Self::new(nanos);
6356        if let Some(name) = self.name() {
6357            out = out.set_name(name);
6358        }
6359        Ok(out)
6360    }
6361}
6362
6363/// Public pandas-style period index wrapper.
6364///
6365/// `Period` already lives in `fp-types`; this wrapper gives callers a typed
6366/// index container while DataFrame integration can still materialize through
6367/// string labels until a dedicated Period `IndexLabel` variant lands.
6368#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
6369pub struct PeriodIndex {
6370    values: Vec<Period>,
6371    name: Option<String>,
6372}
6373
6374impl PeriodIndex {
6375    #[must_use]
6376    pub fn new(values: Vec<Period>) -> Self {
6377        Self { values, name: None }
6378    }
6379
6380    /// Construct a PeriodIndex from raw ordinal values and a frequency,
6381    /// matching `pd.PeriodIndex.from_ordinals(ordinals, freq)`.
6382    #[must_use]
6383    pub fn from_ordinals(ordinals: &[i64], freq: PeriodFreq) -> Self {
6384        let values: Vec<Period> = ordinals
6385            .iter()
6386            .map(|&ordinal| Period::new(ordinal, freq))
6387            .collect();
6388        Self { values, name: None }
6389    }
6390
6391    pub fn from_fields(fields: PeriodFields<'_>) -> Result<Self, IndexError> {
6392        validate_period_fields(&fields)?;
6393        let freq = period_fields_freq(&fields)?;
6394        let values = (0..fields.year.len())
6395            .map(|position| period_from_fields_at(&fields, freq, position))
6396            .collect::<Result<Vec<_>, _>>()?;
6397        Ok(Self { values, name: None })
6398    }
6399
6400    #[must_use]
6401    pub fn from_range(start: Period, periods: usize) -> Self {
6402        Self::new(fp_types::period_range(start, periods))
6403    }
6404
6405    #[must_use]
6406    pub fn values(&self) -> &[Period] {
6407        &self.values
6408    }
6409
6410    #[must_use]
6411    pub fn len(&self) -> usize {
6412        self.values.len()
6413    }
6414
6415    #[must_use]
6416    pub fn is_empty(&self) -> bool {
6417        self.values.is_empty()
6418    }
6419
6420    #[must_use]
6421    pub fn name(&self) -> Option<&str> {
6422        self.name.as_deref()
6423    }
6424
6425    #[must_use]
6426    pub fn set_name(&self, name: &str) -> Self {
6427        let mut out = self.clone();
6428        out.name = Some(name.to_owned());
6429        out
6430    }
6431
6432    #[must_use]
6433    pub fn set_names(&self, name: Option<&str>) -> Self {
6434        let mut out = self.clone();
6435        out.name = name.map(str::to_owned);
6436        out
6437    }
6438
6439    #[must_use]
6440    pub fn rename_index(&self, name: Option<&str>) -> Self {
6441        self.set_names(name)
6442    }
6443
6444    #[must_use]
6445    pub fn names(&self) -> Vec<Option<String>> {
6446        vec![self.name.clone()]
6447    }
6448
6449    #[must_use]
6450    pub fn copy(&self) -> Self {
6451        self.clone()
6452    }
6453
6454    #[must_use]
6455    pub fn shape(&self) -> (usize,) {
6456        (self.len(),)
6457    }
6458
6459    #[must_use]
6460    pub fn size(&self) -> usize {
6461        self.len()
6462    }
6463
6464    #[must_use]
6465    pub fn empty(&self) -> bool {
6466        self.is_empty()
6467    }
6468
6469    #[must_use]
6470    pub fn dtype(&self) -> String {
6471        self.freq().map_or_else(
6472            || "period[unknown]".to_owned(),
6473            |freq| format!("period[{freq}]"),
6474        )
6475    }
6476
6477    #[must_use]
6478    pub fn dtypes(&self) -> Vec<String> {
6479        vec![self.dtype()]
6480    }
6481
6482    /// Whether any period label is missing.
6483    ///
6484    /// FrankenPandas `Period` currently has no NaT sentinel, so this is
6485    /// always false until native period missing values are introduced.
6486    #[must_use]
6487    pub fn hasnans(&self) -> bool {
6488        false
6489    }
6490
6491    /// Missing-value mask, matching `pd.PeriodIndex.isna()`.
6492    #[must_use]
6493    pub fn isna(&self) -> Vec<bool> {
6494        vec![false; self.len()]
6495    }
6496
6497    /// Alias for [`isna`](Self::isna), matching `pd.PeriodIndex.isnull()`.
6498    #[must_use]
6499    pub fn isnull(&self) -> Vec<bool> {
6500        self.isna()
6501    }
6502
6503    /// Non-missing mask, matching `pd.PeriodIndex.notna()`.
6504    #[must_use]
6505    pub fn notna(&self) -> Vec<bool> {
6506        vec![true; self.len()]
6507    }
6508
6509    /// Alias for [`notna`](Self::notna), matching `pd.PeriodIndex.notnull()`.
6510    #[must_use]
6511    pub fn notnull(&self) -> Vec<bool> {
6512        self.notna()
6513    }
6514
6515    /// Drop missing labels, matching `pd.PeriodIndex.dropna()`.
6516    ///
6517    /// With no native Period NaT sentinel, this is a name-preserving clone.
6518    #[must_use]
6519    pub fn dropna(&self) -> Self {
6520        self.clone()
6521    }
6522
6523    #[must_use]
6524    pub fn memory_usage(&self, deep: bool) -> usize {
6525        let name_bytes = if deep {
6526            self.name.as_ref().map_or(0, String::len)
6527        } else {
6528            0
6529        };
6530        self.values.len() * std::mem::size_of::<Period>() + name_bytes
6531    }
6532
6533    #[must_use]
6534    pub fn nbytes(&self) -> usize {
6535        self.memory_usage(false)
6536    }
6537
6538    fn compare_periods(left: &Period, right: &Period) -> std::cmp::Ordering {
6539        left.cmp_same_freq(right).unwrap_or_else(|| {
6540            left.freq
6541                .cmp(&right.freq)
6542                .then(left.ordinal.cmp(&right.ordinal))
6543        })
6544    }
6545
6546    #[must_use]
6547    pub fn is_unique(&self) -> bool {
6548        let unique: FxHashSet<&Period> = self.values.iter().collect();
6549        unique.len() == self.values.len()
6550    }
6551
6552    #[must_use]
6553    pub fn has_duplicates(&self) -> bool {
6554        !self.is_unique()
6555    }
6556
6557    #[must_use]
6558    pub fn is_monotonic_increasing(&self) -> bool {
6559        self.values
6560            .windows(2)
6561            .all(|window| Self::compare_periods(&window[0], &window[1]).is_le())
6562    }
6563
6564    #[must_use]
6565    pub fn is_monotonic(&self) -> bool {
6566        self.is_monotonic_increasing()
6567    }
6568
6569    #[must_use]
6570    pub fn is_monotonic_decreasing(&self) -> bool {
6571        self.values
6572            .windows(2)
6573            .all(|window| Self::compare_periods(&window[0], &window[1]).is_ge())
6574    }
6575
6576    #[must_use]
6577    pub fn nunique(&self) -> usize {
6578        self.values.iter().collect::<FxHashSet<_>>().len()
6579    }
6580
6581    #[must_use]
6582    pub fn ndim(&self) -> usize {
6583        1
6584    }
6585
6586    pub fn item(&self) -> Result<Period, IndexError> {
6587        if self.values.len() == 1 {
6588            Ok(self.values[0])
6589        } else {
6590            Err(IndexError::InvalidArgument(format!(
6591                "item requires exactly one label, got {}",
6592                self.values.len()
6593            )))
6594        }
6595    }
6596
6597    #[must_use]
6598    pub fn is_(&self, other: &Self) -> bool {
6599        std::ptr::eq(self, other)
6600    }
6601
6602    #[must_use]
6603    pub fn equals(&self, other: &Self) -> bool {
6604        self.values == other.values
6605    }
6606
6607    #[must_use]
6608    pub fn identical(&self, other: &Self) -> bool {
6609        self.equals(other) && self.name == other.name
6610    }
6611
6612    #[must_use]
6613    pub fn holds_integer(&self) -> bool {
6614        false
6615    }
6616
6617    #[must_use]
6618    pub fn inferred_type(&self) -> &'static str {
6619        "period"
6620    }
6621
6622    #[must_use]
6623    pub fn is_boolean(&self) -> bool {
6624        false
6625    }
6626
6627    #[must_use]
6628    pub fn is_categorical(&self) -> bool {
6629        false
6630    }
6631
6632    #[must_use]
6633    pub fn is_floating(&self) -> bool {
6634        false
6635    }
6636
6637    #[must_use]
6638    pub fn is_integer(&self) -> bool {
6639        false
6640    }
6641
6642    #[must_use]
6643    pub fn is_interval(&self) -> bool {
6644        false
6645    }
6646
6647    #[must_use]
6648    pub fn is_numeric(&self) -> bool {
6649        false
6650    }
6651
6652    #[must_use]
6653    pub fn is_object(&self) -> bool {
6654        false
6655    }
6656
6657    #[must_use]
6658    pub fn freq(&self) -> Option<PeriodFreq> {
6659        self.values.first().map(|period| period.freq)
6660    }
6661
6662    /// Raw period ordinals, matching `pd.PeriodIndex.asi8`.
6663    #[must_use]
6664    pub fn asi8(&self) -> Vec<i64> {
6665        self.values.iter().map(|period| period.ordinal).collect()
6666    }
6667
6668    #[must_use]
6669    pub fn to_list(&self) -> Vec<Period> {
6670        self.values.clone()
6671    }
6672
6673    #[must_use]
6674    pub fn tolist(&self) -> Vec<Period> {
6675        self.to_list()
6676    }
6677
6678    #[must_use]
6679    pub fn to_numpy(&self) -> Vec<Period> {
6680        self.values.clone()
6681    }
6682
6683    #[must_use]
6684    pub fn array(&self) -> Vec<Period> {
6685        self.values.clone()
6686    }
6687
6688    #[must_use]
6689    pub fn to_index(&self) -> Index {
6690        Index::from_utf8(self.values.iter().map(Period::to_string).collect())
6691            .set_names(self.name.as_deref())
6692    }
6693
6694    /// First-seen unique periods, matching `pd.PeriodIndex.unique()`.
6695    /// Preserves the index name.
6696    #[must_use]
6697    pub fn unique(&self) -> Self {
6698        let mut seen = FxHashSet::<&Period>::default();
6699        let mut uniques = Vec::<Period>::new();
6700        for period in &self.values {
6701            if seen.insert(period) {
6702                uniques.push(*period);
6703            }
6704        }
6705        Self {
6706            values: uniques,
6707            name: self.name.clone(),
6708        }
6709    }
6710
6711    /// Per-position duplicate mask, matching
6712    /// `pd.PeriodIndex.duplicated(keep)`.
6713    #[must_use]
6714    pub fn duplicated(&self, keep: DuplicateKeep) -> Vec<bool> {
6715        let mut result = vec![false; self.values.len()];
6716        match keep {
6717            DuplicateKeep::First => {
6718                let mut seen = FxHashSet::<&Period>::default();
6719                for (i, period) in self.values.iter().enumerate() {
6720                    if !seen.insert(period) {
6721                        result[i] = true;
6722                    }
6723                }
6724            }
6725            DuplicateKeep::Last => {
6726                let mut seen = FxHashSet::<&Period>::default();
6727                for (i, period) in self.values.iter().enumerate().rev() {
6728                    if !seen.insert(period) {
6729                        result[i] = true;
6730                    }
6731                }
6732            }
6733            DuplicateKeep::None => {
6734                let mut counts = FxHashMap::<&Period, usize>::default();
6735                for period in &self.values {
6736                    *counts.entry(period).or_insert(0) += 1;
6737                }
6738                for (i, period) in self.values.iter().enumerate() {
6739                    if counts.get(period).copied().unwrap_or(0) > 1 {
6740                        result[i] = true;
6741                    }
6742                }
6743            }
6744        }
6745        result
6746    }
6747
6748    /// Drop duplicate periods (keep first), matching
6749    /// `pd.PeriodIndex.drop_duplicates()`.
6750    #[must_use]
6751    pub fn drop_duplicates(&self) -> Self {
6752        self.unique()
6753    }
6754
6755    /// Value counts, matching `pd.PeriodIndex.value_counts()`. Pandas
6756    /// sorts descending by count.
6757    #[must_use]
6758    pub fn value_counts(&self) -> Vec<(Period, usize)> {
6759        let mut order = Vec::<&Period>::new();
6760        let mut counts = FxHashMap::<&Period, usize>::default();
6761        for period in &self.values {
6762            let entry = counts.entry(period).or_insert_with(|| {
6763                order.push(period);
6764                0
6765            });
6766            *entry += 1;
6767        }
6768        let mut pairs: Vec<(Period, usize)> = order.iter().map(|p| (**p, counts[*p])).collect();
6769        pairs.sort_by_key(|entry| std::cmp::Reverse(entry.1));
6770        pairs
6771    }
6772
6773    /// Pick periods at the given positions, matching
6774    /// `pd.PeriodIndex.take()`. Out-of-bounds positions raise
6775    /// [`IndexError::OutOfBounds`].
6776    pub fn take(&self, positions: &[usize]) -> Result<Self, IndexError> {
6777        for &p in positions {
6778            if p >= self.values.len() {
6779                return Err(IndexError::OutOfBounds {
6780                    position: p,
6781                    length: self.values.len(),
6782                });
6783            }
6784        }
6785        let taken: Vec<Period> = positions.iter().map(|&p| self.values[p]).collect();
6786        Ok(Self {
6787            values: taken,
6788            name: self.name.clone(),
6789        })
6790    }
6791
6792    /// Repeat each period `repeats` times, matching
6793    /// `pd.PeriodIndex.repeat()`.
6794    #[must_use]
6795    pub fn repeat(&self, repeats: usize) -> Self {
6796        let mut out = Vec::with_capacity(self.values.len() * repeats);
6797        for &period in &self.values {
6798            for _ in 0..repeats {
6799                out.push(period);
6800            }
6801        }
6802        Self {
6803            values: out,
6804            name: self.name.clone(),
6805        }
6806    }
6807
6808    /// Positional first differences in period-frequency units.
6809    ///
6810    /// Pandas returns frequency offset objects for `PeriodIndex.diff()`. The
6811    /// Rust surface exposes the same semantic payload as ordinal deltas while
6812    /// preserving null slots for positions without a comparison partner or
6813    /// mixed-frequency pairs.
6814    #[must_use]
6815    pub fn diff(&self, periods: i64) -> Vec<Option<i64>> {
6816        positional_diff(self.values.len(), periods, |current, previous| {
6817            self.values[current].diff(&self.values[previous])
6818        })
6819    }
6820
6821    /// Convert periods to a new frequency using pandas' default end boundary.
6822    ///
6823    /// Matches `pd.PeriodIndex.asfreq(freq)` for supported target frequencies.
6824    pub fn asfreq(&self, freq: &str) -> Result<Self, IndexError> {
6825        self.asfreq_with_how(freq, "end")
6826    }
6827
6828    /// Convert periods to a new frequency at the requested boundary.
6829    ///
6830    /// Supported `how` values mirror pandas' common aliases:
6831    /// `start` / `s` / `begin` and `end` / `e` / `finish`.
6832    pub fn asfreq_with_how(&self, freq: &str, how: &str) -> Result<Self, IndexError> {
6833        let target_freq = PeriodFreq::parse(freq).ok_or_else(|| {
6834            IndexError::InvalidArgument(format!("asfreq: unsupported frequency '{freq}'"))
6835        })?;
6836        let boundary = parse_period_boundary_how(how, "asfreq")?;
6837        let values = self
6838            .values
6839            .iter()
6840            .copied()
6841            .map(|period| {
6842                let nanos = period_boundary_nanos(period, boundary)?;
6843                datetime_period_ordinal_at_boundary(nanos, target_freq, boundary)
6844                    .map(|ordinal| Period::new(ordinal, target_freq))
6845            })
6846            .collect::<Result<Vec<_>, _>>()?;
6847        Ok(Self {
6848            values,
6849            name: self.name.clone(),
6850        })
6851    }
6852
6853    fn to_timestamp_boundary(&self, boundary: PeriodBoundary) -> Result<DatetimeIndex, IndexError> {
6854        let nanos = self
6855            .values
6856            .iter()
6857            .copied()
6858            .map(|period| period_boundary_nanos(period, boundary))
6859            .collect::<Result<Vec<_>, _>>()?;
6860        let mut out = DatetimeIndex::new(nanos);
6861        if let Some(name) = self.name() {
6862            out = out.set_name(name);
6863        }
6864        Ok(out)
6865    }
6866
6867    /// Timestamp at each period's start boundary.
6868    ///
6869    /// Matches `pd.PeriodIndex.start_time`.
6870    pub fn start_time(&self) -> Result<DatetimeIndex, IndexError> {
6871        self.to_timestamp_boundary(PeriodBoundary::Start)
6872    }
6873
6874    /// Timestamp at each period's inclusive end boundary.
6875    ///
6876    /// Matches `pd.PeriodIndex.end_time`.
6877    pub fn end_time(&self) -> Result<DatetimeIndex, IndexError> {
6878        self.to_timestamp_boundary(PeriodBoundary::End)
6879    }
6880
6881    /// Convert periods to timestamp labels at the requested boundary.
6882    ///
6883    /// Supported `how` values mirror pandas' common aliases:
6884    /// `start` / `s` / `begin` and `end` / `e` / `finish`.
6885    pub fn to_timestamp(&self, how: &str) -> Result<DatetimeIndex, IndexError> {
6886        match how.trim().to_ascii_lowercase().as_str() {
6887            "" | "s" | "start" | "begin" | "b" => self.start_time(),
6888            "e" | "end" | "finish" => self.end_time(),
6889            other => Err(IndexError::InvalidArgument(format!(
6890                "to_timestamp how must be 'start' or 'end', got {other:?}"
6891            ))),
6892        }
6893    }
6894
6895    /// Fiscal year for each period's ending boundary.
6896    ///
6897    /// For the currently supported unanchored frequencies this is the
6898    /// calendar year of `end_time`, matching pandas' `PeriodIndex.qyear`.
6899    pub fn qyear(&self) -> Result<Vec<i32>, IndexError> {
6900        self.values
6901            .iter()
6902            .copied()
6903            .map(period_qyear)
6904            .collect::<Result<Vec<_>, _>>()
6905    }
6906
6907    // ── Per br-frankenpandas-qigpe: date-part accessors (19 methods) ──
6908
6909    /// Year component for each period, matching `pd.PeriodIndex.year`.
6910    pub fn year(&self) -> Result<Vec<Option<i32>>, IndexError> {
6911        Ok(self.start_time()?.year())
6912    }
6913
6914    /// Month component (1-12), matching `pd.PeriodIndex.month`.
6915    pub fn month(&self) -> Result<Vec<Option<u32>>, IndexError> {
6916        Ok(self.start_time()?.month())
6917    }
6918
6919    /// Day of month (1-31), matching `pd.PeriodIndex.day`.
6920    pub fn day(&self) -> Result<Vec<Option<u32>>, IndexError> {
6921        Ok(self.start_time()?.day())
6922    }
6923
6924    /// Hour (0-23), matching `pd.PeriodIndex.hour`.
6925    pub fn hour(&self) -> Result<Vec<Option<u32>>, IndexError> {
6926        Ok(self.start_time()?.hour())
6927    }
6928
6929    /// Minute (0-59), matching `pd.PeriodIndex.minute`.
6930    pub fn minute(&self) -> Result<Vec<Option<u32>>, IndexError> {
6931        Ok(self.start_time()?.minute())
6932    }
6933
6934    /// Second (0-59), matching `pd.PeriodIndex.second`.
6935    pub fn second(&self) -> Result<Vec<Option<u32>>, IndexError> {
6936        Ok(self.start_time()?.second())
6937    }
6938
6939    /// Quarter (1-4), matching `pd.PeriodIndex.quarter`.
6940    pub fn quarter(&self) -> Result<Vec<Option<u32>>, IndexError> {
6941        Ok(self.start_time()?.quarter())
6942    }
6943
6944    /// Day of week (0=Monday, 6=Sunday), matching `pd.PeriodIndex.weekday`.
6945    pub fn weekday(&self) -> Result<Vec<Option<u32>>, IndexError> {
6946        Ok(self.start_time()?.weekday())
6947    }
6948
6949    /// Day of week (0=Monday, 6=Sunday), alias for weekday.
6950    /// Matches `pd.PeriodIndex.dayofweek`.
6951    pub fn dayofweek(&self) -> Result<Vec<Option<u32>>, IndexError> {
6952        self.weekday()
6953    }
6954
6955    /// Day of week (0=Monday, 6=Sunday), alias for weekday.
6956    /// Matches `pd.PeriodIndex.day_of_week`.
6957    pub fn day_of_week(&self) -> Result<Vec<Option<u32>>, IndexError> {
6958        self.weekday()
6959    }
6960
6961    /// Day of year (1-366), matching `pd.PeriodIndex.dayofyear`.
6962    pub fn dayofyear(&self) -> Result<Vec<Option<u32>>, IndexError> {
6963        Ok(self.start_time()?.dayofyear())
6964    }
6965
6966    /// Day of year (1-366), alias for dayofyear.
6967    /// Matches `pd.PeriodIndex.day_of_year`.
6968    pub fn day_of_year(&self) -> Result<Vec<Option<u32>>, IndexError> {
6969        self.dayofyear()
6970    }
6971
6972    /// Days in month (28-31), matching `pd.PeriodIndex.days_in_month`.
6973    pub fn days_in_month(&self) -> Result<Vec<Option<u32>>, IndexError> {
6974        Ok(self.start_time()?.daysinmonth())
6975    }
6976
6977    /// Days in month (28-31), alias for days_in_month.
6978    /// Matches `pd.PeriodIndex.daysinmonth`.
6979    pub fn daysinmonth(&self) -> Result<Vec<Option<u32>>, IndexError> {
6980        self.days_in_month()
6981    }
6982
6983    /// ISO week number (1-53), matching `pd.PeriodIndex.week`.
6984    pub fn week(&self) -> Result<Vec<Option<u32>>, IndexError> {
6985        Ok(self.start_time()?.week())
6986    }
6987
6988    /// ISO week number (1-53), alias for week.
6989    /// Matches `pd.PeriodIndex.weekofyear`.
6990    pub fn weekofyear(&self) -> Result<Vec<Option<u32>>, IndexError> {
6991        self.week()
6992    }
6993
6994    /// Whether year is a leap year, matching `pd.PeriodIndex.is_leap_year`.
6995    pub fn is_leap_year(&self) -> Result<Vec<Option<bool>>, IndexError> {
6996        Ok(self.start_time()?.is_leap_year())
6997    }
6998
6999    /// Frequency resolution string, matching `pd.PeriodIndex.resolution`.
7000    #[must_use]
7001    pub fn resolution(&self) -> Option<&'static str> {
7002        self.values.first().map(|p| p.freq.resolution())
7003    }
7004
7005    /// Format each period as a string with strftime, matching `pd.PeriodIndex.strftime`.
7006    pub fn strftime(&self, fmt: &str) -> Result<Vec<Option<String>>, IndexError> {
7007        Ok(self.start_time()?.strftime(fmt))
7008    }
7009
7010    fn ensure_homogeneous_freq(&self) -> Result<Option<PeriodFreq>, IndexError> {
7011        let mut iter = self.values.iter();
7012        let Some(first) = iter.next() else {
7013            return Ok(None);
7014        };
7015        for period in iter {
7016            if period.freq != first.freq {
7017                return Err(IndexError::InvalidArgument(format!(
7018                    "PeriodIndex has mixed frequencies: {:?} and {:?}",
7019                    first.freq, period.freq
7020                )));
7021            }
7022        }
7023        Ok(Some(first.freq))
7024    }
7025
7026    fn ensure_compatible_freq(&self, other: &Self) -> Result<(), IndexError> {
7027        if let (Some(left), Some(right)) = (self.values.first(), other.values.first())
7028            && left.freq != right.freq
7029        {
7030            return Err(IndexError::InvalidArgument(format!(
7031                "set operation: incompatible frequencies {:?} vs {:?}",
7032                left.freq, right.freq
7033            )));
7034        }
7035        self.ensure_homogeneous_freq()?;
7036        other.ensure_homogeneous_freq()?;
7037        Ok(())
7038    }
7039
7040    /// Periods present in both, matching
7041    /// `pd.PeriodIndex.intersection(other)`. Mixed-freq rejects.
7042    pub fn intersection(&self, other: &Self) -> Result<Self, IndexError> {
7043        self.ensure_compatible_freq(other)?;
7044        let other_set: FxHashSet<&Period> = other.values.iter().collect();
7045        let mut seen = FxHashSet::<&Period>::default();
7046        let values: Vec<Period> = self
7047            .values
7048            .iter()
7049            .filter(|p| other_set.contains(p) && seen.insert(p))
7050            .copied()
7051            .collect();
7052        Ok(Self {
7053            values,
7054            name: if self.name == other.name {
7055                self.name.clone()
7056            } else {
7057                None
7058            },
7059        })
7060    }
7061
7062    /// Self periods then other periods not seen, matching
7063    /// `pd.PeriodIndex.union(other)`. Mixed-freq rejects.
7064    pub fn union(&self, other: &Self) -> Result<Self, IndexError> {
7065        self.ensure_compatible_freq(other)?;
7066        let mut seen = FxHashSet::<Period>::default();
7067        let values: Vec<Period> = self
7068            .values
7069            .iter()
7070            .chain(other.values.iter())
7071            .filter(|p| seen.insert(**p))
7072            .copied()
7073            .collect();
7074        Ok(Self {
7075            values,
7076            name: if self.name == other.name {
7077                self.name.clone()
7078            } else {
7079                None
7080            },
7081        })
7082    }
7083
7084    /// Self periods not in other, matching
7085    /// `pd.PeriodIndex.difference(other)`. Mixed-freq rejects.
7086    pub fn difference(&self, other: &Self) -> Result<Self, IndexError> {
7087        self.ensure_compatible_freq(other)?;
7088        let other_set: FxHashSet<&Period> = other.values.iter().collect();
7089        let mut seen = FxHashSet::<&Period>::default();
7090        let values: Vec<Period> = self
7091            .values
7092            .iter()
7093            .filter(|p| !other_set.contains(p) && seen.insert(p))
7094            .copied()
7095            .collect();
7096        Ok(Self {
7097            values,
7098            name: if self.name == other.name {
7099                self.name.clone()
7100            } else {
7101                None
7102            },
7103        })
7104    }
7105
7106    /// Periods in either but not both, matching
7107    /// `pd.PeriodIndex.symmetric_difference(other)`. Mixed-freq rejects.
7108    pub fn symmetric_difference(&self, other: &Self) -> Result<Self, IndexError> {
7109        self.ensure_compatible_freq(other)?;
7110        let self_set: FxHashSet<&Period> = self.values.iter().collect();
7111        let other_set: FxHashSet<&Period> = other.values.iter().collect();
7112        let mut seen = FxHashSet::<Period>::default();
7113        let mut values = Vec::<Period>::new();
7114        for p in &self.values {
7115            if !other_set.contains(p) && seen.insert(*p) {
7116                values.push(*p);
7117            }
7118        }
7119        for p in &other.values {
7120            if !self_set.contains(p) && seen.insert(*p) {
7121                values.push(*p);
7122            }
7123        }
7124        Ok(Self {
7125            values,
7126            name: if self.name == other.name {
7127                self.name.clone()
7128            } else {
7129                None
7130            },
7131        })
7132    }
7133
7134    /// Sort periods by ordinal ascending, matching
7135    /// `pd.PeriodIndex.sort_values()`. Mixed-frequency rejects.
7136    pub fn sort_values(&self) -> Result<Self, IndexError> {
7137        self.ensure_homogeneous_freq()?;
7138        let mut periods = self.values.clone();
7139        periods.sort_by_key(|period| period.ordinal);
7140        Ok(Self {
7141            values: periods,
7142            name: self.name.clone(),
7143        })
7144    }
7145
7146    /// Alias for `sort_values`, matching `pd.PeriodIndex.sort()`.
7147    pub fn sort(&self) -> Result<Self, IndexError> {
7148        self.sort_values()
7149    }
7150
7151    /// Position of the maximum ordinal, matching
7152    /// `pd.PeriodIndex.argmax()`. Mixed-freq input rejects; empty
7153    /// raises pandas-style "attempt to get argmax of an empty
7154    /// sequence".
7155    pub fn argmax(&self) -> Result<usize, IndexError> {
7156        self.ensure_homogeneous_freq()?;
7157        if self.values.is_empty() {
7158            return Err(IndexError::InvalidArgument(
7159                "attempt to get argmax of an empty sequence".to_owned(),
7160            ));
7161        }
7162        let mut best = 0;
7163        for (i, period) in self.values.iter().enumerate().skip(1) {
7164            if period.ordinal > self.values[best].ordinal {
7165                best = i;
7166            }
7167        }
7168        Ok(best)
7169    }
7170
7171    /// Position of the minimum ordinal, matching
7172    /// `pd.PeriodIndex.argmin()`. Mixed-freq input rejects; empty
7173    /// raises pandas-style "attempt to get argmin of an empty
7174    /// sequence".
7175    pub fn argmin(&self) -> Result<usize, IndexError> {
7176        self.ensure_homogeneous_freq()?;
7177        if self.values.is_empty() {
7178            return Err(IndexError::InvalidArgument(
7179                "attempt to get argmin of an empty sequence".to_owned(),
7180            ));
7181        }
7182        let mut best = 0;
7183        for (i, period) in self.values.iter().enumerate().skip(1) {
7184            if period.ordinal < self.values[best].ordinal {
7185                best = i;
7186            }
7187        }
7188        Ok(best)
7189    }
7190
7191    /// Positions that would sort the index by ordinal ascending,
7192    /// matching `pd.PeriodIndex.argsort()`. Mixed-freq input rejects.
7193    pub fn argsort(&self) -> Result<Vec<usize>, IndexError> {
7194        self.ensure_homogeneous_freq()?;
7195        let mut positions: Vec<usize> = (0..self.values.len()).collect();
7196        positions.sort_by_key(|&i| self.values[i].ordinal);
7197        Ok(positions)
7198    }
7199
7200    /// Period with the mean ordinal, matching `pd.PeriodIndex.mean()`.
7201    /// Mixed-frequency input rejects.
7202    pub fn mean(&self) -> Result<Option<Period>, IndexError> {
7203        let freq = match self.ensure_homogeneous_freq()? {
7204            Some(f) => f,
7205            None => return Ok(None),
7206        };
7207        let total: i128 = self.values.iter().map(|p| i128::from(p.ordinal)).sum();
7208        let count = self.values.len() as i128;
7209        let avg = i64::try_from(total / count)
7210            .map_err(|_| IndexError::InvalidArgument("mean: ordinal overflow".to_owned()))?;
7211        Ok(Some(Period::new(avg, freq)))
7212    }
7213
7214    /// Period with the median ordinal, matching `pd.PeriodIndex.median()`.
7215    /// For an even-length subset, returns the period at floor(median) of
7216    /// the two middle ordinals.
7217    pub fn median(&self) -> Result<Option<Period>, IndexError> {
7218        let freq = match self.ensure_homogeneous_freq()? {
7219            Some(f) => f,
7220            None => return Ok(None),
7221        };
7222        let mut ordinals: Vec<i64> = self.values.iter().map(|p| p.ordinal).collect();
7223        ordinals.sort_unstable();
7224        let mid = ordinals.len() / 2;
7225        let median = if ordinals.len() % 2 == 1 {
7226            ordinals[mid]
7227        } else {
7228            let total = i128::from(ordinals[mid - 1]) + i128::from(ordinals[mid]);
7229            i64::try_from(total / 2)
7230                .map_err(|_| IndexError::InvalidArgument("median: ordinal overflow".to_owned()))?
7231        };
7232        Ok(Some(Period::new(median, freq)))
7233    }
7234
7235    /// Period with the smallest ordinal, matching `pd.PeriodIndex.min()`.
7236    /// Mixed-frequency input rejects because pandas requires same-freq
7237    /// comparisons; empty returns `Ok(None)` to mirror the pandas NaT result.
7238    pub fn min(&self) -> Result<Option<Period>, IndexError> {
7239        self.ensure_homogeneous_freq()?;
7240        Ok(self
7241            .values
7242            .iter()
7243            .copied()
7244            .min_by_key(|period| period.ordinal))
7245    }
7246
7247    /// Period with the largest ordinal, matching `pd.PeriodIndex.max()`.
7248    pub fn max(&self) -> Result<Option<Period>, IndexError> {
7249        self.ensure_homogeneous_freq()?;
7250        Ok(self
7251            .values
7252            .iter()
7253            .copied()
7254            .max_by_key(|period| period.ordinal))
7255    }
7256
7257    /// Binary-search insertion position, matching
7258    /// `pd.PeriodIndex.searchsorted(value, side)`. Mixed-frequency lookups
7259    /// reject because pandas requires same-freq comparisons. side must be
7260    /// `"left"` or `"right"`.
7261    pub fn searchsorted(&self, value: Period, side: &str) -> Result<usize, IndexError> {
7262        if side != "left" && side != "right" {
7263            return Err(IndexError::InvalidArgument(format!(
7264                "searchsorted: side must be 'left' or 'right', got {side:?}"
7265            )));
7266        }
7267        if let Some(first) = self.values.first()
7268            && first.freq != value.freq
7269        {
7270            return Err(IndexError::InvalidArgument(format!(
7271                "searchsorted: needle frequency {:?} does not match index frequency {:?}",
7272                value.freq, first.freq
7273            )));
7274        }
7275        let mut lo = 0usize;
7276        let mut hi = self.values.len();
7277        while lo < hi {
7278            let mid = lo + (hi - lo) / 2;
7279            let cmp = self.values[mid].ordinal.cmp(&value.ordinal);
7280            use std::cmp::Ordering;
7281            let go_right = matches!(
7282                (cmp, side),
7283                (Ordering::Less, _) | (Ordering::Equal, "right")
7284            );
7285            if go_right {
7286                lo = mid + 1;
7287            } else {
7288                hi = mid;
7289            }
7290        }
7291        Ok(lo)
7292    }
7293
7294    /// Half-open positional range for a period slice, matching
7295    /// `pd.PeriodIndex.slice_indexer(start, end)`.
7296    pub fn slice_indexer(
7297        &self,
7298        start: Period,
7299        end: Period,
7300    ) -> Result<std::ops::Range<usize>, IndexError> {
7301        let (left, right) = self.slice_locs(start, end)?;
7302        Ok(left..right)
7303    }
7304
7305    /// Find positions of `[start, end]` for a period slice, matching
7306    /// `pd.PeriodIndex.slice_locs(start, end)`. Requires the index to
7307    /// be sorted ascending and the start/end periods to share its
7308    /// frequency.
7309    pub fn slice_locs(&self, start: Period, end: Period) -> Result<(usize, usize), IndexError> {
7310        if !self.is_monotonic_increasing() {
7311            return Err(IndexError::InvalidArgument(
7312                "slice_locs requires a monotonic increasing PeriodIndex".to_owned(),
7313            ));
7314        }
7315        let left = self.searchsorted(start, "left")?;
7316        let right = self.searchsorted(end, "right")?;
7317        Ok((left, right))
7318    }
7319
7320    /// First position of `period`, matching
7321    /// `pd.PeriodIndex.get_loc(period)`.
7322    pub fn get_loc(&self, period: Period) -> Result<usize, IndexError> {
7323        self.values
7324            .iter()
7325            .position(|p| *p == period)
7326            .ok_or_else(|| {
7327                IndexError::InvalidArgument(format!("get_loc: period {period} not in PeriodIndex"))
7328            })
7329    }
7330
7331    /// Set the index name, matching `pd.PeriodIndex.rename(name)`.
7332    #[must_use]
7333    pub fn rename(&self, name: &str) -> Self {
7334        self.set_name(name)
7335    }
7336
7337    /// Reindex against `target`, matching
7338    /// `pd.PeriodIndex.reindex(target)`.
7339    #[must_use]
7340    pub fn reindex(&self, target: &Self) -> (Self, Vec<isize>) {
7341        let indexer = self.get_indexer(target.values());
7342        (target.clone(), indexer)
7343    }
7344
7345    /// Locate every position matching each target, matching
7346    /// `pd.PeriodIndex.get_indexer_non_unique(targets)`.
7347    #[must_use]
7348    pub fn get_indexer_non_unique(&self, targets: &[Period]) -> (Vec<isize>, Vec<usize>) {
7349        let mut by_value = FxHashMap::<Period, Vec<usize>>::default();
7350        for (i, period) in self.values.iter().enumerate() {
7351            by_value.entry(*period).or_default().push(i);
7352        }
7353        let mut positions = Vec::<isize>::new();
7354        let mut missing = Vec::<usize>::new();
7355        for (idx, target) in targets.iter().enumerate() {
7356            if let Some(matches) = by_value.get(target) {
7357                positions.extend(
7358                    matches
7359                        .iter()
7360                        .map(|p| isize::try_from(*p).unwrap_or(isize::MAX)),
7361                );
7362            } else {
7363                positions.push(-1);
7364                missing.push(idx);
7365            }
7366        }
7367        (positions, missing)
7368    }
7369
7370    /// Alias for [`get_indexer`], matching
7371    /// `pd.PeriodIndex.get_indexer_for(targets)`.
7372    #[must_use]
7373    pub fn get_indexer_for(&self, targets: &[Period]) -> Vec<isize> {
7374        self.get_indexer(targets)
7375    }
7376
7377    /// Locate each target period in the index, matching
7378    /// `pd.PeriodIndex.get_indexer(targets)`. Returns `Vec<isize>`
7379    /// where `-1` means "missing".
7380    #[must_use]
7381    pub fn get_indexer(&self, targets: &[Period]) -> Vec<isize> {
7382        let mut positions = FxHashMap::<Period, isize>::default();
7383        for (i, period) in self.values.iter().enumerate() {
7384            positions
7385                .entry(*period)
7386                .or_insert_with(|| isize::try_from(i).unwrap_or(isize::MAX));
7387        }
7388        targets
7389            .iter()
7390            .map(|p| positions.get(p).copied().unwrap_or(-1))
7391            .collect()
7392    }
7393
7394    /// Replace positions where `cond` is `false` with `other`, matching
7395    /// `pd.PeriodIndex.where(cond, other)`. The replacement period must
7396    /// share the index frequency.
7397    pub fn r#where(&self, cond: &[bool], other: Period) -> Result<Self, IndexError> {
7398        if cond.len() != self.values.len() {
7399            return Err(IndexError::LengthMismatch {
7400                expected: self.values.len(),
7401                actual: cond.len(),
7402                context: "where: cond length must match index length".to_owned(),
7403            });
7404        }
7405        if let Some(first) = self.values.first()
7406            && first.freq != other.freq
7407        {
7408            return Err(IndexError::InvalidArgument(format!(
7409                "where: replacement frequency {:?} does not match index frequency {:?}",
7410                other.freq, first.freq
7411            )));
7412        }
7413        let values: Vec<Period> = self
7414            .values
7415            .iter()
7416            .zip(cond.iter())
7417            .map(|(period, &keep)| if keep { *period } else { other })
7418            .collect();
7419        Ok(Self {
7420            values,
7421            name: self.name.clone(),
7422        })
7423    }
7424
7425    /// Replace positions where `mask` is `true` with `value`, matching
7426    /// `pd.PeriodIndex.putmask(mask, value)`.
7427    pub fn putmask(&self, mask: &[bool], value: Period) -> Result<Self, IndexError> {
7428        if mask.len() != self.values.len() {
7429            return Err(IndexError::LengthMismatch {
7430                expected: self.values.len(),
7431                actual: mask.len(),
7432                context: "putmask: mask length must match index length".to_owned(),
7433            });
7434        }
7435        if let Some(first) = self.values.first()
7436            && first.freq != value.freq
7437        {
7438            return Err(IndexError::InvalidArgument(format!(
7439                "putmask: replacement frequency {:?} does not match index frequency {:?}",
7440                value.freq, first.freq
7441            )));
7442        }
7443        let values: Vec<Period> = self
7444            .values
7445            .iter()
7446            .zip(mask.iter())
7447            .map(|(period, &replace)| if replace { value } else { *period })
7448            .collect();
7449        Ok(Self {
7450            values,
7451            name: self.name.clone(),
7452        })
7453    }
7454
7455    /// Insert `period` at position `loc`, matching
7456    /// `pd.PeriodIndex.insert(loc, period)`.
7457    pub fn insert(&self, loc: usize, period: Period) -> Result<Self, IndexError> {
7458        if loc > self.values.len() {
7459            return Err(IndexError::OutOfBounds {
7460                position: loc,
7461                length: self.values.len(),
7462            });
7463        }
7464        let mut periods = self.values.clone();
7465        periods.insert(loc, period);
7466        Ok(Self {
7467            values: periods,
7468            name: self.name.clone(),
7469        })
7470    }
7471
7472    /// Shift each period by `n` units of its own frequency, matching
7473    /// `pd.PeriodIndex.shift(periods)`. Mixed-frequency input rejects
7474    /// via the existing ensure_homogeneous_freq guard.
7475    pub fn shift(&self, n: i64) -> Result<Self, IndexError> {
7476        self.ensure_homogeneous_freq()?;
7477        let values: Vec<Period> = self.values.iter().map(|p| p.shift(n)).collect();
7478        Ok(Self {
7479            values,
7480            name: self.name.clone(),
7481        })
7482    }
7483
7484    /// Period labels are already discrete; pandas PeriodIndex.round returns a clone.
7485    #[must_use]
7486    pub fn round(&self, _freq: &str) -> Self {
7487        self.clone()
7488    }
7489
7490    /// Whether period ordinals form a contiguous run, matching
7491    /// `pd.PeriodIndex.is_full`. Empty and single-element indexes are
7492    /// trivially full. Mixed-frequency input returns `false` because the
7493    /// concept is undefined.
7494    #[must_use]
7495    pub fn is_full(&self) -> bool {
7496        if self.values.len() <= 1 {
7497            return true;
7498        }
7499        // Mixed-freq: not full.
7500        let first_freq = self.values[0].freq;
7501        if self.values.iter().any(|p| p.freq != first_freq) {
7502            return false;
7503        }
7504        let mut sorted: Vec<i64> = self.values.iter().map(|p| p.ordinal).collect();
7505        sorted.sort_unstable();
7506        sorted.windows(2).all(|w| w[1] - w[0] == 1)
7507    }
7508
7509    /// Stringify each period via Display, matching `pd.PeriodIndex.format()`.
7510    #[must_use]
7511    pub fn format(&self) -> Vec<String> {
7512        self.values.iter().map(Period::to_string).collect()
7513    }
7514
7515    /// Frequency alias, matching `pd.PeriodIndex.freqstr`. Returns `None`
7516    /// for an empty index; otherwise the Display form of the freq.
7517    #[must_use]
7518    pub fn freqstr(&self) -> Option<String> {
7519        self.freq().map(|f| f.to_string())
7520    }
7521
7522    /// Inferred frequency, matching `pd.PeriodIndex.inferred_freq`. Pandas
7523    /// returns the freq when all periods share it, otherwise `None`. Mixed
7524    /// frequency is detected via `ensure_homogeneous_freq` (the same guard
7525    /// used by min/max).
7526    #[must_use]
7527    pub fn inferred_freq(&self) -> Option<String> {
7528        match self.ensure_homogeneous_freq() {
7529            Ok(Some(freq)) => Some(freq.to_string()),
7530            Ok(None) | Err(_) => None,
7531        }
7532    }
7533
7534    /// Convert to a flat [`Index`] of period strings, matching
7535    /// `pd.PeriodIndex.to_flat_index()`.
7536    #[must_use]
7537    pub fn to_flat_index(&self) -> Index {
7538        self.to_index()
7539    }
7540
7541    /// String accessor for rendered period labels.
7542    #[must_use]
7543    pub fn r#str(&self) -> IndexStringAccessor<'_> {
7544        IndexStringAccessor::owned(self.to_flat_index())
7545    }
7546
7547    /// One-column row materialization, matching `pd.PeriodIndex.to_frame(index=False)`.
7548    #[must_use]
7549    pub fn to_frame(&self) -> Vec<Vec<IndexLabel>> {
7550        self.to_flat_index().to_frame()
7551    }
7552
7553    /// Series-shaped materialization using period labels as both index and values.
7554    #[must_use]
7555    pub fn to_series(&self) -> Vec<(IndexLabel, IndexLabel)> {
7556        self.to_flat_index().to_series()
7557    }
7558
7559    /// Whether any period label coerces to true.
7560    #[must_use]
7561    pub fn any(&self) -> bool {
7562        self.to_flat_index().any()
7563    }
7564
7565    /// Whether all period labels coerce to true.
7566    #[must_use]
7567    pub fn all(&self) -> bool {
7568        self.to_flat_index().all()
7569    }
7570
7571    /// Get labels for a level. PeriodIndex is flat and only accepts level 0.
7572    pub fn get_level_values(&self, level: usize) -> Result<Index, IndexError> {
7573        self.to_flat_index().get_level_values(level)
7574    }
7575
7576    /// Drop a level. PeriodIndex is flat, so removing its only level is invalid.
7577    pub fn droplevel(&self, level: usize) -> Result<Index, IndexError> {
7578        self.to_flat_index().droplevel(level)
7579    }
7580
7581    /// Group equal period labels into position buckets.
7582    #[must_use]
7583    pub fn groupby(&self) -> HashMap<IndexLabel, Vec<usize>> {
7584        self.to_flat_index().groupby()
7585    }
7586
7587    /// Apply a function to each period label, returning a flat Index.
7588    #[must_use]
7589    pub fn map<F>(&self, func: F) -> Index
7590    where
7591        F: Fn(&IndexLabel) -> IndexLabel,
7592    {
7593        self.to_flat_index().map(func)
7594    }
7595
7596    /// Cast period labels to a pandas dtype string, returning a flat Index.
7597    pub fn astype(&self, dtype: &str) -> Result<Index, IndexError> {
7598        match dtype {
7599            "int" | "int64" => Ok(Index::from_i64(
7600                self.values.iter().map(|period| period.ordinal).collect(),
7601            )
7602            .set_names(self.name())),
7603            "datetime64[ns]" => Ok(Index::from_datetime64(
7604                self.values
7605                    .iter()
7606                    .copied()
7607                    .map(period_start_nanos)
7608                    .collect::<Result<Vec<_>, _>>()?,
7609            )
7610            .set_names(self.name())),
7611            _ => self.to_flat_index().astype(dtype),
7612        }
7613    }
7614
7615    /// Nearest preceding-or-equal period label lookup.
7616    #[must_use]
7617    pub fn asof(&self, key: &IndexLabel) -> Option<IndexLabel> {
7618        self.to_flat_index().asof(key)
7619    }
7620
7621    /// Locate nearest preceding-or-equal period positions for each target label.
7622    #[must_use]
7623    pub fn asof_locs(&self, where_index: &Index, mask: Option<&[bool]>) -> Vec<Option<usize>> {
7624        self.to_flat_index().asof_locs(where_index, mask)
7625    }
7626
7627    /// Drop period labels, returning a flat Index.
7628    #[must_use]
7629    pub fn drop(&self, labels_to_drop: &[IndexLabel]) -> Index {
7630        self.to_flat_index().drop(labels_to_drop)
7631    }
7632
7633    /// Join period labels with another flat Index.
7634    pub fn join(&self, other: &Index, how: &str) -> Result<Index, IndexError> {
7635        self.to_flat_index().join(other, how)
7636    }
7637
7638    /// Sort period labels and return the positional sorter.
7639    #[must_use]
7640    pub fn sortlevel(&self) -> (Index, Vec<usize>) {
7641        self.to_flat_index().sortlevel()
7642    }
7643
7644    /// Returns a clone, matching `pd.PeriodIndex.view()`.
7645    #[must_use]
7646    pub fn view(&self) -> Self {
7647        self.clone()
7648    }
7649
7650    /// Identity transpose for a 1D index, matching
7651    /// `pd.PeriodIndex.transpose()`.
7652    #[must_use]
7653    pub fn transpose(&self) -> Self {
7654        self.clone()
7655    }
7656
7657    /// Alias for `transpose`, matching `pd.PeriodIndex.T`.
7658    #[allow(non_snake_case)]
7659    #[must_use]
7660    pub fn T(&self) -> Self {
7661        self.transpose()
7662    }
7663
7664    /// Flatten periods to a Vec<Period>, matching
7665    /// `pd.PeriodIndex.ravel()`.
7666    #[must_use]
7667    pub fn ravel(&self) -> Vec<Period> {
7668        self.values.clone()
7669    }
7670
7671    /// Number of levels, matching `pd.PeriodIndex.nlevels`. Always `1`.
7672    #[must_use]
7673    pub fn nlevels(&self) -> usize {
7674        1
7675    }
7676
7677    /// Identity dtype-reinference for typed indexes, matching
7678    /// `pd.PeriodIndex.infer_objects()`.
7679    #[must_use]
7680    pub fn infer_objects(&self) -> Self {
7681        self.clone()
7682    }
7683
7684    /// Per-position membership mask, matching `pd.PeriodIndex.isin(values)`.
7685    #[must_use]
7686    pub fn isin(&self, values: &[Period]) -> Vec<bool> {
7687        let needle: FxHashSet<Period> = values.iter().copied().collect();
7688        self.values.iter().map(|p| needle.contains(p)).collect()
7689    }
7690
7691    /// Concatenate with another PeriodIndex, matching
7692    /// `pd.PeriodIndex.append(other)`. Preserves the index name when both
7693    /// operands share it; otherwise the name is dropped.
7694    #[must_use]
7695    pub fn append(&self, other: &Self) -> Self {
7696        let mut periods = self.values.clone();
7697        periods.extend_from_slice(&other.values);
7698        let name = if self.name == other.name {
7699            self.name.clone()
7700        } else {
7701            None
7702        };
7703        Self {
7704            values: periods,
7705            name,
7706        }
7707    }
7708
7709    /// Remove the period at the given position, matching
7710    /// `pd.PeriodIndex.delete(loc)`.
7711    pub fn delete(&self, loc: usize) -> Result<Self, IndexError> {
7712        if loc >= self.values.len() {
7713            return Err(IndexError::OutOfBounds {
7714                position: loc,
7715                length: self.values.len(),
7716            });
7717        }
7718        let mut periods = self.values.clone();
7719        periods.remove(loc);
7720        Ok(Self {
7721            values: periods,
7722            name: self.name.clone(),
7723        })
7724    }
7725
7726    /// Factorize, matching `pd.PeriodIndex.factorize()`. Returns
7727    /// `(codes, uniques)` with isize codes — Period currently has no
7728    /// missing-value sentinel so all codes are non-negative.
7729    #[must_use]
7730    pub fn factorize(&self) -> (Vec<isize>, Self) {
7731        let mut positions = FxHashMap::<&Period, isize>::default();
7732        let mut uniques = Vec::<Period>::new();
7733        let mut codes = Vec::with_capacity(self.values.len());
7734        for period in &self.values {
7735            if let Some(code) = positions.get(period) {
7736                codes.push(*code);
7737            } else {
7738                let code = isize::try_from(uniques.len()).unwrap_or(isize::MAX);
7739                positions.insert(period, code);
7740                uniques.push(*period);
7741                codes.push(code);
7742            }
7743        }
7744        (
7745            codes,
7746            Self {
7747                values: uniques,
7748                name: self.name.clone(),
7749            },
7750        )
7751    }
7752}
7753
7754/// Public pandas-style range index wrapper.
7755#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
7756pub struct RangeIndex {
7757    start: i64,
7758    stop: i64,
7759    step: i64,
7760    name: Option<String>,
7761}
7762
7763impl RangeIndex {
7764    pub fn new(start: i64, stop: i64, step: i64) -> Result<Self, IndexError> {
7765        if step == 0 {
7766            return Err(IndexError::InvalidArgument(
7767                "RangeIndex step must be non-zero".to_owned(),
7768            ));
7769        }
7770        Ok(Self {
7771            start,
7772            stop,
7773            step,
7774            name: None,
7775        })
7776    }
7777
7778    pub fn from_len(len: usize) -> Result<Self, IndexError> {
7779        let stop = i64::try_from(len).map_err(|_| {
7780            IndexError::InvalidArgument("RangeIndex length exceeds i64 range".to_owned())
7781        })?;
7782        Self::new(0, stop, 1)
7783    }
7784
7785    #[must_use]
7786    pub const fn start(&self) -> i64 {
7787        self.start
7788    }
7789
7790    #[must_use]
7791    pub const fn stop(&self) -> i64 {
7792        self.stop
7793    }
7794
7795    #[must_use]
7796    pub const fn step(&self) -> i64 {
7797        self.step
7798    }
7799
7800    #[must_use]
7801    pub fn len(&self) -> usize {
7802        let start = i128::from(self.start);
7803        let stop = i128::from(self.stop);
7804        let step = i128::from(self.step);
7805        let len = if step > 0 {
7806            if start >= stop {
7807                0
7808            } else {
7809                (stop - start + step - 1) / step
7810            }
7811        } else if start <= stop {
7812            0
7813        } else {
7814            let positive_step = -step;
7815            (start - stop + positive_step - 1) / positive_step
7816        };
7817        usize::try_from(len).unwrap_or(usize::MAX)
7818    }
7819
7820    #[must_use]
7821    pub fn is_empty(&self) -> bool {
7822        self.len() == 0
7823    }
7824
7825    #[must_use]
7826    pub fn name(&self) -> Option<&str> {
7827        self.name.as_deref()
7828    }
7829
7830    #[must_use]
7831    pub fn set_name(&self, name: &str) -> Self {
7832        let mut out = self.clone();
7833        out.name = Some(name.to_owned());
7834        out
7835    }
7836
7837    #[must_use]
7838    pub fn set_names(&self, name: Option<&str>) -> Self {
7839        let mut out = self.clone();
7840        out.name = name.map(str::to_owned);
7841        out
7842    }
7843
7844    #[must_use]
7845    pub fn rename_index(&self, name: Option<&str>) -> Self {
7846        self.set_names(name)
7847    }
7848
7849    #[must_use]
7850    pub fn names(&self) -> Vec<Option<String>> {
7851        vec![self.name.clone()]
7852    }
7853
7854    #[must_use]
7855    pub fn copy(&self) -> Self {
7856        self.clone()
7857    }
7858
7859    #[must_use]
7860    pub fn shape(&self) -> (usize,) {
7861        (self.len(),)
7862    }
7863
7864    #[must_use]
7865    pub fn size(&self) -> usize {
7866        self.len()
7867    }
7868
7869    #[must_use]
7870    pub fn empty(&self) -> bool {
7871        self.is_empty()
7872    }
7873
7874    #[must_use]
7875    pub fn dtype(&self) -> &'static str {
7876        "int64"
7877    }
7878
7879    #[must_use]
7880    pub fn dtypes(&self) -> Vec<&'static str> {
7881        vec![self.dtype()]
7882    }
7883
7884    #[must_use]
7885    pub fn memory_usage(&self, _deep: bool) -> usize {
7886        self.len() * std::mem::size_of::<i64>()
7887    }
7888
7889    #[must_use]
7890    pub fn nbytes(&self) -> usize {
7891        self.memory_usage(false)
7892    }
7893
7894    #[must_use]
7895    pub fn is_unique(&self) -> bool {
7896        true
7897    }
7898
7899    #[must_use]
7900    pub fn has_duplicates(&self) -> bool {
7901        false
7902    }
7903
7904    #[must_use]
7905    pub fn is_monotonic_increasing(&self) -> bool {
7906        self.len() <= 1 || self.step > 0
7907    }
7908
7909    #[must_use]
7910    pub fn is_monotonic(&self) -> bool {
7911        self.is_monotonic_increasing()
7912    }
7913
7914    #[must_use]
7915    pub fn is_monotonic_decreasing(&self) -> bool {
7916        self.len() <= 1 || self.step < 0
7917    }
7918
7919    #[must_use]
7920    pub fn nunique(&self) -> usize {
7921        self.len()
7922    }
7923
7924    #[must_use]
7925    pub fn ndim(&self) -> usize {
7926        1
7927    }
7928
7929    pub fn item(&self) -> Result<i64, IndexError> {
7930        if self.len() == 1 {
7931            Ok(self.start)
7932        } else {
7933            Err(IndexError::InvalidArgument(format!(
7934                "item requires exactly one label, got {}",
7935                self.len()
7936            )))
7937        }
7938    }
7939
7940    #[must_use]
7941    pub fn is_(&self, other: &Self) -> bool {
7942        std::ptr::eq(self, other)
7943    }
7944
7945    #[must_use]
7946    pub fn equals(&self, other: &Self) -> bool {
7947        self.values() == other.values()
7948    }
7949
7950    #[must_use]
7951    pub fn identical(&self, other: &Self) -> bool {
7952        self.equals(other) && self.name == other.name
7953    }
7954
7955    #[must_use]
7956    pub fn holds_integer(&self) -> bool {
7957        true
7958    }
7959
7960    #[must_use]
7961    pub fn inferred_type(&self) -> &'static str {
7962        if self.is_empty() { "empty" } else { "integer" }
7963    }
7964
7965    #[must_use]
7966    pub fn is_boolean(&self) -> bool {
7967        false
7968    }
7969
7970    #[must_use]
7971    pub fn is_categorical(&self) -> bool {
7972        false
7973    }
7974
7975    #[must_use]
7976    pub fn is_floating(&self) -> bool {
7977        false
7978    }
7979
7980    #[must_use]
7981    pub fn is_integer(&self) -> bool {
7982        true
7983    }
7984
7985    #[must_use]
7986    pub fn is_interval(&self) -> bool {
7987        false
7988    }
7989
7990    #[must_use]
7991    pub fn is_numeric(&self) -> bool {
7992        true
7993    }
7994
7995    #[must_use]
7996    pub fn is_object(&self) -> bool {
7997        false
7998    }
7999
8000    #[must_use]
8001    pub fn to_index(&self) -> Index {
8002        Index::from_range(self.start, self.stop, self.step).set_names(self.name.as_deref())
8003    }
8004
8005    #[must_use]
8006    pub fn values(&self) -> Vec<i64> {
8007        self.to_index()
8008            .labels()
8009            .iter()
8010            .filter_map(|label| match label {
8011                IndexLabel::Int64(value) => Some(*value),
8012                IndexLabel::Utf8(_)
8013                | IndexLabel::Timedelta64(_)
8014                | IndexLabel::Datetime64(_)
8015                | IndexLabel::Null(_) => None,
8016            })
8017            .collect()
8018    }
8019
8020    /// Positional first differences for RangeIndex values.
8021    #[must_use]
8022    pub fn diff(&self, periods: i64) -> Vec<Option<i64>> {
8023        let values = self.values();
8024        positional_diff(values.len(), periods, |current, previous| {
8025            values[current].checked_sub(values[previous])
8026        })
8027    }
8028
8029    #[must_use]
8030    pub fn to_list(&self) -> Vec<i64> {
8031        self.values()
8032    }
8033
8034    #[must_use]
8035    pub fn tolist(&self) -> Vec<i64> {
8036        self.values()
8037    }
8038
8039    #[must_use]
8040    pub fn to_numpy(&self) -> Vec<i64> {
8041        self.values()
8042    }
8043
8044    #[must_use]
8045    pub fn array(&self) -> Vec<i64> {
8046        self.values()
8047    }
8048
8049    /// Position of the maximum value, matching `pd.RangeIndex.argmax()`.
8050    ///
8051    /// `step > 0` makes the last position the maximum; `step < 0` makes
8052    /// position 0 the maximum. Empty ranges raise the same
8053    /// `attempt to get argmax of an empty sequence` error pandas surfaces.
8054    pub fn argmax(&self) -> Result<usize, IndexError> {
8055        if self.is_empty() {
8056            return Err(IndexError::InvalidArgument(
8057                "attempt to get argmax of an empty sequence".to_owned(),
8058            ));
8059        }
8060        if self.step > 0 {
8061            Ok(self.len() - 1)
8062        } else {
8063            Ok(0)
8064        }
8065    }
8066
8067    /// Position of the minimum value, matching `pd.RangeIndex.argmin()`.
8068    pub fn argmin(&self) -> Result<usize, IndexError> {
8069        if self.is_empty() {
8070            return Err(IndexError::InvalidArgument(
8071                "attempt to get argmin of an empty sequence".to_owned(),
8072            ));
8073        }
8074        if self.step > 0 {
8075            Ok(0)
8076        } else {
8077            Ok(self.len() - 1)
8078        }
8079    }
8080
8081    /// Positions that would sort the index ascending, matching
8082    /// `pd.RangeIndex.argsort()`.
8083    #[must_use]
8084    pub fn argsort(&self) -> Vec<usize> {
8085        let len = self.len();
8086        if self.step >= 0 {
8087            (0..len).collect()
8088        } else {
8089            (0..len).rev().collect()
8090        }
8091    }
8092
8093    /// RangeIndex enforces uniqueness, so every position is reported as a
8094    /// non-duplicate, matching `pd.RangeIndex.duplicated(keep=...)`.
8095    #[must_use]
8096    pub fn duplicated(&self, _keep: DuplicateKeep) -> Vec<bool> {
8097        vec![false; self.len()]
8098    }
8099
8100    /// Drop duplicates, matching `pd.RangeIndex.drop_duplicates()`.
8101    /// Returns a clone because RangeIndex never has duplicates.
8102    #[must_use]
8103    pub fn drop_duplicates(&self) -> Self {
8104        self.clone()
8105    }
8106
8107    /// Per-position missingness mask, matching `pd.RangeIndex.isna()`.
8108    /// Always all-false because RangeIndex is int64-typed.
8109    #[must_use]
8110    pub fn isna(&self) -> Vec<bool> {
8111        vec![false; self.len()]
8112    }
8113
8114    /// Alias for [`isna`], matching `pd.RangeIndex.isnull()`.
8115    #[must_use]
8116    pub fn isnull(&self) -> Vec<bool> {
8117        self.isna()
8118    }
8119
8120    /// Per-position non-missing mask, matching `pd.RangeIndex.notna()`.
8121    #[must_use]
8122    pub fn notna(&self) -> Vec<bool> {
8123        vec![true; self.len()]
8124    }
8125
8126    /// Alias for [`notna`], matching `pd.RangeIndex.notnull()`.
8127    #[must_use]
8128    pub fn notnull(&self) -> Vec<bool> {
8129        self.notna()
8130    }
8131
8132    /// Whether any position is missing, matching `pd.RangeIndex.hasnans`.
8133    #[must_use]
8134    pub fn hasnans(&self) -> bool {
8135        false
8136    }
8137
8138    /// Drop missing positions, matching `pd.RangeIndex.dropna()`.
8139    /// Returns a clone because RangeIndex cannot hold missing values.
8140    #[must_use]
8141    pub fn dropna(&self) -> Self {
8142        self.clone()
8143    }
8144
8145    /// Fill missing positions, matching `pd.RangeIndex.fillna(value)`.
8146    /// Returns a clone — RangeIndex has no missing positions to fill.
8147    #[must_use]
8148    pub fn fillna(&self, _value: i64) -> Self {
8149        self.clone()
8150    }
8151
8152    /// Stringify each value, matching `pd.RangeIndex.format()`.
8153    #[must_use]
8154    pub fn format(&self) -> Vec<String> {
8155        self.values().into_iter().map(|v| v.to_string()).collect()
8156    }
8157
8158    /// Identity factorization, matching `pd.RangeIndex.factorize()`.
8159    /// Codes are [0..len) because every value is unique; uniques is a
8160    /// clone of `self`.
8161    #[must_use]
8162    pub fn factorize(&self) -> (Vec<usize>, Self) {
8163        ((0..self.len()).collect(), self.clone())
8164    }
8165
8166    /// Pick values at the given positions, matching
8167    /// `pd.RangeIndex.take()`. Out-of-bounds positions raise
8168    /// [`IndexError::OutOfBounds`].
8169    pub fn take(&self, positions: &[usize]) -> Result<Index, IndexError> {
8170        let values = self.values();
8171        for &p in positions {
8172            if p >= values.len() {
8173                return Err(IndexError::OutOfBounds {
8174                    position: p,
8175                    length: values.len(),
8176                });
8177            }
8178        }
8179        let labels: Vec<IndexLabel> = positions
8180            .iter()
8181            .map(|&p| IndexLabel::Int64(values[p]))
8182            .collect();
8183        let mut idx = Index::new(labels);
8184        if let Some(name) = self.name() {
8185            idx = idx.set_name(name);
8186        }
8187        Ok(idx)
8188    }
8189
8190    /// Repeat each value `repeats` times, matching `pd.RangeIndex.repeat()`.
8191    /// Returns a flat [`Index`] because the result is generally not a
8192    /// contiguous range.
8193    #[must_use]
8194    pub fn repeat(&self, repeats: usize) -> Index {
8195        let mut labels = Vec::with_capacity(self.len() * repeats);
8196        for value in self.values() {
8197            for _ in 0..repeats {
8198                labels.push(IndexLabel::Int64(value));
8199            }
8200        }
8201        let mut idx = Index::new(labels);
8202        if let Some(name) = self.name() {
8203            idx = idx.set_name(name);
8204        }
8205        idx
8206    }
8207
8208    /// First and last value as (start, last), or None if empty. Used to
8209    /// power closed-form reductions that don't materialize the full vector.
8210    fn first_last(&self) -> Option<(i64, i64)> {
8211        let len = self.len();
8212        if len == 0 {
8213            return None;
8214        }
8215        let last = self.start + (len as i64 - 1) * self.step;
8216        Some((self.start, last))
8217    }
8218
8219    /// Sort values ascending, matching `pd.RangeIndex.sort_values()`.
8220    /// Ascending or zero step returns a clone; descending step rebuilds
8221    /// an ascending RangeIndex starting from min with positive step.
8222    /// Empty returns clone of self.
8223    #[must_use]
8224    pub fn sort_values(&self) -> Self {
8225        if self.is_empty() || self.step >= 0 {
8226            return self.clone();
8227        }
8228        let len = self.len();
8229        let last = self.start + (len as i64 - 1) * self.step;
8230        let new_step = -self.step;
8231        let new_stop = last + (len as i64) * new_step;
8232        Self {
8233            start: last,
8234            stop: new_stop,
8235            step: new_step,
8236            name: self.name.clone(),
8237        }
8238    }
8239
8240    /// Alias for `sort_values`, matching `pd.RangeIndex.sort()`.
8241    #[must_use]
8242    pub fn sort(&self) -> Self {
8243        self.sort_values()
8244    }
8245
8246    /// Smallest value in the range, matching `pd.RangeIndex.min()`. Closed
8247    /// form on (start, step, len). Empty returns None.
8248    #[must_use]
8249    pub fn min(&self) -> Option<i64> {
8250        let (first, last) = self.first_last()?;
8251        Some(first.min(last))
8252    }
8253
8254    /// Largest value in the range, matching `pd.RangeIndex.max()`.
8255    #[must_use]
8256    pub fn max(&self) -> Option<i64> {
8257        let (first, last) = self.first_last()?;
8258        Some(first.max(last))
8259    }
8260
8261    /// Median value, matching `pd.RangeIndex.median()`. Returns `None`
8262    /// for an empty range; for an even-length range, returns the average
8263    /// of the two middle values as f64.
8264    #[must_use]
8265    pub fn median(&self) -> Option<f64> {
8266        let len = self.len();
8267        if len == 0 {
8268            return None;
8269        }
8270        let values = self.values();
8271        let mid = len / 2;
8272        if len % 2 == 1 {
8273            Some(values[mid] as f64)
8274        } else {
8275            Some((values[mid - 1] as f64 + values[mid] as f64) / 2.0)
8276        }
8277    }
8278
8279    /// Sample variance (ddof=1), matching `pd.RangeIndex.var()`. Returns
8280    /// `None` for fewer than two values.
8281    #[must_use]
8282    pub fn var(&self) -> Option<f64> {
8283        let values: Vec<f64> = self.values().into_iter().map(|v| v as f64).collect();
8284        if values.len() < 2 {
8285            return None;
8286        }
8287        let mean = values.iter().sum::<f64>() / values.len() as f64;
8288        Some(values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / (values.len() as f64 - 1.0))
8289    }
8290
8291    /// Sample standard deviation (ddof=1), matching `pd.RangeIndex.std()`.
8292    #[must_use]
8293    pub fn std(&self) -> Option<f64> {
8294        self.var().map(f64::sqrt)
8295    }
8296
8297    /// Product of all values, matching `pd.RangeIndex.prod()`. Empty
8298    /// returns 1; saturating to i64 on overflow.
8299    #[must_use]
8300    pub fn prod(&self) -> i64 {
8301        let mut total: i128 = 1;
8302        for v in self.values() {
8303            total = total.saturating_mul(i128::from(v));
8304        }
8305        i64::try_from(total).unwrap_or(if total > 0 { i64::MAX } else { i64::MIN })
8306    }
8307
8308    /// Sum of all values, matching `pd.RangeIndex.sum()`. Closed form via
8309    /// arithmetic-progression: `n * (first + last) / 2` when `n*(first+last)`
8310    /// is even; falls back to a precise i128 path otherwise.
8311    #[must_use]
8312    pub fn sum(&self) -> i64 {
8313        let len = self.len();
8314        if len == 0 {
8315            return 0;
8316        }
8317        let Some((first, last)) = self.first_last() else {
8318            return 0;
8319        };
8320        let n = i128::from(len as i64);
8321        let total = (i128::from(first) + i128::from(last)) * n / 2;
8322        i64::try_from(total).unwrap_or(i64::MAX)
8323    }
8324
8325    /// Mean of all values, matching `pd.RangeIndex.mean()`. Returns `None`
8326    /// for an empty range.
8327    #[must_use]
8328    pub fn mean(&self) -> Option<f64> {
8329        let len = self.len();
8330        if len == 0 {
8331            return None;
8332        }
8333        let (first, last) = self.first_last()?;
8334        Some((first as f64 + last as f64) / 2.0)
8335    }
8336
8337    /// Binary-search insertion position, matching
8338    /// `pd.RangeIndex.searchsorted(value, side)`. Restricted to
8339    /// ascending ranges (`step > 0`) because searchsorted assumes a
8340    /// monotonically-increasing input; negative-step ranges raise.
8341    pub fn searchsorted(&self, value: i64, side: &str) -> Result<usize, IndexError> {
8342        if side != "left" && side != "right" {
8343            return Err(IndexError::InvalidArgument(format!(
8344                "searchsorted: side must be 'left' or 'right', got {side:?}"
8345            )));
8346        }
8347        if self.step < 0 {
8348            return Err(IndexError::InvalidArgument(
8349                "searchsorted requires a monotonically-increasing RangeIndex".to_owned(),
8350            ));
8351        }
8352        let values = self.values();
8353        let mut lo = 0usize;
8354        let mut hi = values.len();
8355        while lo < hi {
8356            let mid = lo + (hi - lo) / 2;
8357            let cmp = values[mid].cmp(&value);
8358            use std::cmp::Ordering;
8359            let go_right = matches!(
8360                (cmp, side),
8361                (Ordering::Less, _) | (Ordering::Equal, "right")
8362            );
8363            if go_right {
8364                lo = mid + 1;
8365            } else {
8366                hi = mid;
8367            }
8368        }
8369        Ok(lo)
8370    }
8371
8372    /// Convert to a flat [`Index`] of i64 labels, matching
8373    /// `pd.RangeIndex.to_flat_index()`.
8374    #[must_use]
8375    pub fn to_flat_index(&self) -> Index {
8376        let labels: Vec<IndexLabel> = self.values().into_iter().map(IndexLabel::Int64).collect();
8377        let mut idx = Index::new(labels);
8378        if let Some(name) = self.name() {
8379            idx = idx.set_name(name);
8380        }
8381        idx
8382    }
8383
8384    /// String accessor for the flat integer labels.
8385    #[must_use]
8386    pub fn r#str(&self) -> IndexStringAccessor<'_> {
8387        IndexStringAccessor::owned(self.to_flat_index())
8388    }
8389
8390    /// One-column row materialization, matching `pd.RangeIndex.to_frame(index=False)`.
8391    #[must_use]
8392    pub fn to_frame(&self) -> Vec<Vec<IndexLabel>> {
8393        self.to_flat_index().to_frame()
8394    }
8395
8396    /// Series-shaped materialization using range labels as both index and values.
8397    #[must_use]
8398    pub fn to_series(&self) -> Vec<(IndexLabel, IndexLabel)> {
8399        self.to_flat_index().to_series()
8400    }
8401
8402    /// Whether any range label coerces to true.
8403    #[must_use]
8404    pub fn any(&self) -> bool {
8405        self.to_flat_index().any()
8406    }
8407
8408    /// Whether all range labels coerce to true.
8409    #[must_use]
8410    pub fn all(&self) -> bool {
8411        self.to_flat_index().all()
8412    }
8413
8414    /// Get labels for a level. RangeIndex is flat and only accepts level 0.
8415    pub fn get_level_values(&self, level: usize) -> Result<Index, IndexError> {
8416        self.to_flat_index().get_level_values(level)
8417    }
8418
8419    /// Drop a level. RangeIndex is flat, so removing its only level is invalid.
8420    pub fn droplevel(&self, level: usize) -> Result<Index, IndexError> {
8421        self.to_flat_index().droplevel(level)
8422    }
8423
8424    /// Group equal range labels into position buckets.
8425    #[must_use]
8426    pub fn groupby(&self) -> HashMap<IndexLabel, Vec<usize>> {
8427        self.to_flat_index().groupby()
8428    }
8429
8430    /// Apply a function to each range label, returning a flat Index.
8431    #[must_use]
8432    pub fn map<F>(&self, func: F) -> Index
8433    where
8434        F: Fn(&IndexLabel) -> IndexLabel,
8435    {
8436        self.to_flat_index().map(func)
8437    }
8438
8439    /// Cast range labels to a pandas dtype string, returning a flat Index.
8440    pub fn astype(&self, dtype: &str) -> Result<Index, IndexError> {
8441        self.to_flat_index().astype(dtype)
8442    }
8443
8444    /// Nearest preceding-or-equal range label lookup.
8445    #[must_use]
8446    pub fn asof(&self, key: &IndexLabel) -> Option<IndexLabel> {
8447        self.to_flat_index().asof(key)
8448    }
8449
8450    /// Locate nearest preceding-or-equal range positions for each target label.
8451    #[must_use]
8452    pub fn asof_locs(&self, where_index: &Index, mask: Option<&[bool]>) -> Vec<Option<usize>> {
8453        self.to_flat_index().asof_locs(where_index, mask)
8454    }
8455
8456    /// Drop range labels, returning a flat Index.
8457    #[must_use]
8458    pub fn drop(&self, labels_to_drop: &[IndexLabel]) -> Index {
8459        self.to_flat_index().drop(labels_to_drop)
8460    }
8461
8462    /// Join range labels with another flat Index.
8463    pub fn join(&self, other: &Index, how: &str) -> Result<Index, IndexError> {
8464        self.to_flat_index().join(other, how)
8465    }
8466
8467    /// Sort range labels and return the positional sorter.
8468    #[must_use]
8469    pub fn sortlevel(&self) -> (Index, Vec<usize>) {
8470        self.to_flat_index().sortlevel()
8471    }
8472
8473    /// Returns a clone, matching `pd.RangeIndex.view()`.
8474    #[must_use]
8475    pub fn view(&self) -> Self {
8476        self.clone()
8477    }
8478
8479    /// Identity transpose for a 1D index, matching
8480    /// `pd.RangeIndex.transpose()`.
8481    #[must_use]
8482    pub fn transpose(&self) -> Self {
8483        self.clone()
8484    }
8485
8486    /// Alias for `transpose`, matching `pd.RangeIndex.T`.
8487    #[allow(non_snake_case)]
8488    #[must_use]
8489    pub fn T(&self) -> Self {
8490        self.transpose()
8491    }
8492
8493    /// Flatten the range to a Vec<i64>, matching `pd.RangeIndex.ravel()`.
8494    #[must_use]
8495    pub fn ravel(&self) -> Vec<i64> {
8496        self.values()
8497    }
8498
8499    /// Number of levels, matching `pd.RangeIndex.nlevels`. Always `1`.
8500    #[must_use]
8501    pub fn nlevels(&self) -> usize {
8502        1
8503    }
8504
8505    /// Identity dtype-reinference, matching `pd.RangeIndex.infer_objects()`.
8506    #[must_use]
8507    pub fn infer_objects(&self) -> Self {
8508        self.clone()
8509    }
8510
8511    /// Per-position membership mask, matching `pd.RangeIndex.isin(values)`.
8512    #[must_use]
8513    pub fn isin(&self, values: &[i64]) -> Vec<bool> {
8514        let needle: FxHashSet<i64> = values.iter().copied().collect();
8515        self.values().iter().map(|v| needle.contains(v)).collect()
8516    }
8517
8518    /// Half-open positional range for a value slice, matching
8519    /// `pd.RangeIndex.slice_indexer(start, end)`.
8520    pub fn slice_indexer(
8521        &self,
8522        start: i64,
8523        end: i64,
8524    ) -> Result<std::ops::Range<usize>, IndexError> {
8525        let (left, right) = self.slice_locs(start, end)?;
8526        Ok(left..right)
8527    }
8528
8529    /// Find positions of `[start, end]` for a value slice, matching
8530    /// `pd.RangeIndex.slice_locs(start, end)`. Requires the range to
8531    /// be ascending (`step > 0`).
8532    pub fn slice_locs(&self, start: i64, end: i64) -> Result<(usize, usize), IndexError> {
8533        if self.step < 0 {
8534            return Err(IndexError::InvalidArgument(
8535                "slice_locs requires a monotonic increasing RangeIndex".to_owned(),
8536            ));
8537        }
8538        let left = self.searchsorted(start, "left")?;
8539        let right = self.searchsorted(end, "right")?;
8540        Ok((left, right))
8541    }
8542
8543    /// First position of `value`, matching `pd.RangeIndex.get_loc(value)`.
8544    /// Closed-form on (start, step, len).
8545    pub fn get_loc(&self, value: i64) -> Result<usize, IndexError> {
8546        if self.step == 0 {
8547            return Err(IndexError::InvalidArgument(
8548                "get_loc: zero-step RangeIndex is invalid".to_owned(),
8549            ));
8550        }
8551        let offset = value - self.start;
8552        if offset.checked_rem_euclid(self.step) != Some(0) {
8553            return Err(IndexError::InvalidArgument(format!(
8554                "get_loc: {value} not in RangeIndex"
8555            )));
8556        }
8557        let pos = offset / self.step;
8558        if pos < 0 || (pos as usize) >= self.len() {
8559            return Err(IndexError::InvalidArgument(format!(
8560                "get_loc: {value} not in RangeIndex"
8561            )));
8562        }
8563        Ok(pos as usize)
8564    }
8565
8566    /// Set the index name, matching `pd.RangeIndex.rename(name)`.
8567    #[must_use]
8568    pub fn rename(&self, name: &str) -> Self {
8569        self.set_name(name)
8570    }
8571
8572    /// Reindex against `target`, matching `pd.RangeIndex.reindex(target)`.
8573    /// Returns `(target.clone(), indexer)`.
8574    #[must_use]
8575    pub fn reindex(&self, target: &Self) -> (Self, Vec<isize>) {
8576        let indexer = self.get_indexer(&target.values());
8577        (target.clone(), indexer)
8578    }
8579
8580    /// Locate every position matching each target, matching
8581    /// `pd.RangeIndex.get_indexer_non_unique(targets)`. RangeIndex is
8582    /// always unique so each target either matches one position or
8583    /// none.
8584    #[must_use]
8585    pub fn get_indexer_non_unique(&self, targets: &[i64]) -> (Vec<isize>, Vec<usize>) {
8586        let mut positions = Vec::<isize>::new();
8587        let mut missing = Vec::<usize>::new();
8588        for (idx, target) in targets.iter().enumerate() {
8589            match self.get_loc(*target) {
8590                Ok(p) => positions.push(p as isize),
8591                Err(_) => {
8592                    positions.push(-1);
8593                    missing.push(idx);
8594                }
8595            }
8596        }
8597        (positions, missing)
8598    }
8599
8600    /// Alias for [`get_indexer`], matching
8601    /// `pd.RangeIndex.get_indexer_for(targets)`.
8602    #[must_use]
8603    pub fn get_indexer_for(&self, targets: &[i64]) -> Vec<isize> {
8604        self.get_indexer(targets)
8605    }
8606
8607    /// Locate each target value, matching
8608    /// `pd.RangeIndex.get_indexer(targets)`. Closed-form per target.
8609    #[must_use]
8610    pub fn get_indexer(&self, targets: &[i64]) -> Vec<isize> {
8611        targets
8612            .iter()
8613            .map(|&v| self.get_loc(v).map(|p| p as isize).unwrap_or(-1))
8614            .collect()
8615    }
8616
8617    /// Replace positions where `cond` is `false` with `other`, matching
8618    /// `pd.RangeIndex.where(cond, other)`. Returns flat Index because
8619    /// the result is generally not a contiguous range.
8620    pub fn r#where(&self, cond: &[bool], other: i64) -> Result<Index, IndexError> {
8621        let values = self.values();
8622        if cond.len() != values.len() {
8623            return Err(IndexError::LengthMismatch {
8624                expected: values.len(),
8625                actual: cond.len(),
8626                context: "where: cond length must match index length".to_owned(),
8627            });
8628        }
8629        let labels: Vec<IndexLabel> = values
8630            .into_iter()
8631            .zip(cond.iter())
8632            .map(|(v, &keep)| IndexLabel::Int64(if keep { v } else { other }))
8633            .collect();
8634        let mut out = Index::new(labels);
8635        if let Some(name) = self.name() {
8636            out = out.set_name(name);
8637        }
8638        Ok(out)
8639    }
8640
8641    /// Replace positions where `mask` is `true` with `value`, matching
8642    /// `pd.RangeIndex.putmask(mask, value)`.
8643    pub fn putmask(&self, mask: &[bool], value: i64) -> Result<Index, IndexError> {
8644        let values = self.values();
8645        if mask.len() != values.len() {
8646            return Err(IndexError::LengthMismatch {
8647                expected: values.len(),
8648                actual: mask.len(),
8649                context: "putmask: mask length must match index length".to_owned(),
8650            });
8651        }
8652        let labels: Vec<IndexLabel> = values
8653            .into_iter()
8654            .zip(mask.iter())
8655            .map(|(v, &replace)| IndexLabel::Int64(if replace { value } else { v }))
8656            .collect();
8657        let mut out = Index::new(labels);
8658        if let Some(name) = self.name() {
8659            out = out.set_name(name);
8660        }
8661        Ok(out)
8662    }
8663
8664    fn set_op_via_int<F>(&self, other: &Self, op: F) -> Index
8665    where
8666        F: FnOnce(Vec<i64>, Vec<i64>) -> Vec<i64>,
8667    {
8668        let values = op(self.values(), other.values());
8669        let labels: Vec<IndexLabel> = values.into_iter().map(IndexLabel::Int64).collect();
8670        let mut idx = Index::new(labels);
8671        if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
8672            idx = idx.set_name(name);
8673        }
8674        idx
8675    }
8676
8677    /// Values present in both ranges, matching
8678    /// `pd.RangeIndex.intersection(other)`. Returns flat Index because
8679    /// the result may not be a contiguous range.
8680    #[must_use]
8681    pub fn intersection(&self, other: &Self) -> Index {
8682        self.set_op_via_int(other, |left, right| {
8683            let right_set: FxHashSet<i64> = right.into_iter().collect();
8684            let mut seen = FxHashSet::<i64>::default();
8685            left.into_iter()
8686                .filter(|v| right_set.contains(v) && seen.insert(*v))
8687                .collect()
8688        })
8689    }
8690
8691    /// Self values then other values not seen, matching
8692    /// `pd.RangeIndex.union(other)`.
8693    #[must_use]
8694    pub fn union(&self, other: &Self) -> Index {
8695        self.set_op_via_int(other, |left, right| {
8696            let mut seen = FxHashSet::<i64>::default();
8697            left.into_iter()
8698                .chain(right)
8699                .filter(|v| seen.insert(*v))
8700                .collect()
8701        })
8702    }
8703
8704    /// Self values not in other, matching
8705    /// `pd.RangeIndex.difference(other)`.
8706    #[must_use]
8707    pub fn difference(&self, other: &Self) -> Index {
8708        // Per br-frankenpandas-6r1lq: difference preserves self.name (not
8709        // shared_name like union/intersection). Build inline rather than
8710        // routing through set_op_via_int's shared-name logic.
8711        let right_set: FxHashSet<i64> = other.values().into_iter().collect();
8712        let mut seen = FxHashSet::<i64>::default();
8713        let labels: Vec<IndexLabel> = self
8714            .values()
8715            .into_iter()
8716            .filter(|v| !right_set.contains(v) && seen.insert(*v))
8717            .map(IndexLabel::Int64)
8718            .collect();
8719        let mut idx = Index::new(labels);
8720        if let Some(name) = self.name() {
8721            idx = idx.set_name(name);
8722        }
8723        idx
8724    }
8725
8726    /// Values in either but not both, matching
8727    /// `pd.RangeIndex.symmetric_difference(other)`.
8728    #[must_use]
8729    pub fn symmetric_difference(&self, other: &Self) -> Index {
8730        self.set_op_via_int(other, |left, right| {
8731            let left_set: FxHashSet<i64> = left.iter().copied().collect();
8732            let right_set: FxHashSet<i64> = right.iter().copied().collect();
8733            let mut seen = FxHashSet::<i64>::default();
8734            let mut out = Vec::new();
8735            for v in left {
8736                if !right_set.contains(&v) && seen.insert(v) {
8737                    out.push(v);
8738                }
8739            }
8740            for v in right {
8741                if !left_set.contains(&v) && seen.insert(v) {
8742                    out.push(v);
8743                }
8744            }
8745            out
8746        })
8747    }
8748
8749    /// Insert `value` at position `loc`, matching
8750    /// `pd.RangeIndex.insert(loc, value)`. Returns a flat [`Index`]
8751    /// because the result is generally not a contiguous range.
8752    pub fn insert(&self, loc: usize, value: i64) -> Result<Index, IndexError> {
8753        let values = self.values();
8754        if loc > values.len() {
8755            return Err(IndexError::OutOfBounds {
8756                position: loc,
8757                length: values.len(),
8758            });
8759        }
8760        let mut labels: Vec<IndexLabel> = values.into_iter().map(IndexLabel::Int64).collect();
8761        labels.insert(loc, IndexLabel::Int64(value));
8762        let mut out = Index::new(labels);
8763        if let Some(name) = self.name() {
8764            out = out.set_name(name);
8765        }
8766        Ok(out)
8767    }
8768
8769    /// Concatenate with another RangeIndex, matching
8770    /// `pd.RangeIndex.append(other)`. Returns a flat [`Index`] because the
8771    /// resulting values are generally not a contiguous range; preserves
8772    /// the index name when both operands share it.
8773    #[must_use]
8774    pub fn append(&self, other: &Self) -> Index {
8775        let mut labels: Vec<IndexLabel> =
8776            self.values().into_iter().map(IndexLabel::Int64).collect();
8777        labels.extend(other.values().into_iter().map(IndexLabel::Int64));
8778        let mut out = Index::new(labels);
8779        if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
8780            out = out.set_name(name);
8781        }
8782        out
8783    }
8784
8785    /// Remove the value at the given position, matching
8786    /// `pd.RangeIndex.delete(loc)`. Returns a flat [`Index`] because the
8787    /// residual values may no longer form a contiguous range.
8788    pub fn delete(&self, loc: usize) -> Result<Index, IndexError> {
8789        let values = self.values();
8790        if loc >= values.len() {
8791            return Err(IndexError::OutOfBounds {
8792                position: loc,
8793                length: values.len(),
8794            });
8795        }
8796        let labels: Vec<IndexLabel> = values
8797            .into_iter()
8798            .enumerate()
8799            .filter(|(i, _)| *i != loc)
8800            .map(|(_, v)| IndexLabel::Int64(v))
8801            .collect();
8802        let mut out = Index::new(labels);
8803        if let Some(name) = self.name() {
8804            out = out.set_name(name);
8805        }
8806        Ok(out)
8807    }
8808}
8809
8810/// Public pandas-style categorical index wrapper.
8811#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
8812pub struct CategoricalIndex {
8813    labels: Vec<String>,
8814    categories: Vec<String>,
8815    ordered: bool,
8816    name: Option<String>,
8817}
8818
8819impl CategoricalIndex {
8820    #[must_use]
8821    pub fn from_values(labels: Vec<String>, ordered: bool) -> Self {
8822        // First-seen dedup in O(n): a side hash set tracks membership while the
8823        // categories Vec preserves insertion order, replacing the O(n·k)
8824        // `categories.contains` linear rescan per label.
8825        let mut categories = Vec::<String>::new();
8826        let mut seen: FxHashSet<&str> = FxHashSet::default();
8827        for label in &labels {
8828            if seen.insert(label.as_str()) {
8829                categories.push(label.clone());
8830            }
8831        }
8832        Self {
8833            labels,
8834            categories,
8835            ordered,
8836            name: None,
8837        }
8838    }
8839
8840    pub fn with_categories(
8841        labels: Vec<String>,
8842        categories: Vec<String>,
8843        ordered: bool,
8844    ) -> Result<Self, IndexError> {
8845        // O(n+k) membership: hash the category set once, then validate each
8846        // label in original order (first offending label still reported).
8847        let category_set: FxHashSet<&str> = categories.iter().map(String::as_str).collect();
8848        for label in &labels {
8849            if !category_set.contains(label.as_str()) {
8850                return Err(IndexError::InvalidArgument(format!(
8851                    "CategoricalIndex label {label:?} is not present in categories"
8852                )));
8853            }
8854        }
8855        Ok(Self {
8856            labels,
8857            categories,
8858            ordered,
8859            name: None,
8860        })
8861    }
8862
8863    #[must_use]
8864    pub fn labels(&self) -> &[String] {
8865        &self.labels
8866    }
8867
8868    #[must_use]
8869    pub fn categories(&self) -> &[String] {
8870        &self.categories
8871    }
8872
8873    #[must_use]
8874    pub fn ordered(&self) -> bool {
8875        self.ordered
8876    }
8877
8878    #[must_use]
8879    pub fn len(&self) -> usize {
8880        self.labels.len()
8881    }
8882
8883    #[must_use]
8884    pub fn is_empty(&self) -> bool {
8885        self.labels.is_empty()
8886    }
8887
8888    #[must_use]
8889    pub fn name(&self) -> Option<&str> {
8890        self.name.as_deref()
8891    }
8892
8893    #[must_use]
8894    pub fn set_name(&self, name: &str) -> Self {
8895        let mut out = self.clone();
8896        out.name = Some(name.to_owned());
8897        out
8898    }
8899
8900    #[must_use]
8901    pub fn set_names(&self, name: Option<&str>) -> Self {
8902        let mut out = self.clone();
8903        out.name = name.map(str::to_owned);
8904        out
8905    }
8906
8907    #[must_use]
8908    pub fn rename_index(&self, name: Option<&str>) -> Self {
8909        self.set_names(name)
8910    }
8911
8912    #[must_use]
8913    pub fn names(&self) -> Vec<Option<String>> {
8914        vec![self.name.clone()]
8915    }
8916
8917    #[must_use]
8918    pub fn copy(&self) -> Self {
8919        self.clone()
8920    }
8921
8922    #[must_use]
8923    pub fn shape(&self) -> (usize,) {
8924        (self.len(),)
8925    }
8926
8927    #[must_use]
8928    pub fn size(&self) -> usize {
8929        self.len()
8930    }
8931
8932    #[must_use]
8933    pub fn empty(&self) -> bool {
8934        self.is_empty()
8935    }
8936
8937    #[must_use]
8938    pub fn dtype(&self) -> &'static str {
8939        "category"
8940    }
8941
8942    #[must_use]
8943    pub fn dtypes(&self) -> Vec<&'static str> {
8944        vec![self.dtype()]
8945    }
8946
8947    #[must_use]
8948    pub fn memory_usage(&self, deep: bool) -> usize {
8949        let fixed = (self.labels.len() + self.categories.len()) * std::mem::size_of::<String>();
8950        if deep {
8951            fixed
8952                + self.labels.iter().map(String::len).sum::<usize>()
8953                + self.categories.iter().map(String::len).sum::<usize>()
8954                + self.name.as_ref().map_or(0, String::len)
8955        } else {
8956            fixed
8957        }
8958    }
8959
8960    #[must_use]
8961    pub fn nbytes(&self) -> usize {
8962        self.memory_usage(false)
8963    }
8964
8965    #[must_use]
8966    pub fn isna(&self) -> Vec<bool> {
8967        vec![false; self.len()]
8968    }
8969
8970    #[must_use]
8971    pub fn notna(&self) -> Vec<bool> {
8972        vec![true; self.len()]
8973    }
8974
8975    /// Categorical labels cannot be differenced without converting to a
8976    /// numeric or datetime dtype, matching pandas' fail-closed behavior.
8977    pub fn diff(&self, _periods: i64) -> Result<Vec<Option<i64>>, IndexError> {
8978        Err(IndexError::InvalidArgument(
8979            "Categorical has no 'diff' method; convert to a suitable dtype before calling diff"
8980                .to_owned(),
8981        ))
8982    }
8983
8984    #[must_use]
8985    pub fn is_unique(&self) -> bool {
8986        let unique: FxHashSet<&String> = self.labels.iter().collect();
8987        unique.len() == self.labels.len()
8988    }
8989
8990    #[must_use]
8991    pub fn has_duplicates(&self) -> bool {
8992        !self.is_unique()
8993    }
8994
8995    #[must_use]
8996    pub fn is_monotonic_increasing(&self) -> bool {
8997        let codes = self.codes();
8998        codes.windows(2).all(|window| window[0] <= window[1])
8999    }
9000
9001    #[must_use]
9002    pub fn is_monotonic(&self) -> bool {
9003        self.is_monotonic_increasing()
9004    }
9005
9006    #[must_use]
9007    pub fn is_monotonic_decreasing(&self) -> bool {
9008        let codes = self.codes();
9009        codes.windows(2).all(|window| window[0] >= window[1])
9010    }
9011
9012    #[must_use]
9013    pub fn nunique(&self) -> usize {
9014        self.labels.iter().collect::<FxHashSet<_>>().len()
9015    }
9016
9017    #[must_use]
9018    pub fn ndim(&self) -> usize {
9019        1
9020    }
9021
9022    pub fn item(&self) -> Result<String, IndexError> {
9023        if self.labels.len() == 1 {
9024            Ok(self.labels[0].clone())
9025        } else {
9026            Err(IndexError::InvalidArgument(format!(
9027                "item requires exactly one label, got {}",
9028                self.labels.len()
9029            )))
9030        }
9031    }
9032
9033    #[must_use]
9034    pub fn is_(&self, other: &Self) -> bool {
9035        std::ptr::eq(self, other)
9036    }
9037
9038    #[must_use]
9039    pub fn equals(&self, other: &Self) -> bool {
9040        self.labels == other.labels
9041            && self.categories == other.categories
9042            && self.ordered == other.ordered
9043    }
9044
9045    #[must_use]
9046    pub fn identical(&self, other: &Self) -> bool {
9047        self.equals(other) && self.name == other.name
9048    }
9049
9050    #[must_use]
9051    pub fn holds_integer(&self) -> bool {
9052        false
9053    }
9054
9055    #[must_use]
9056    pub fn inferred_type(&self) -> &'static str {
9057        "categorical"
9058    }
9059
9060    #[must_use]
9061    pub fn is_boolean(&self) -> bool {
9062        false
9063    }
9064
9065    #[must_use]
9066    pub fn is_categorical(&self) -> bool {
9067        true
9068    }
9069
9070    #[must_use]
9071    pub fn is_floating(&self) -> bool {
9072        false
9073    }
9074
9075    #[must_use]
9076    pub fn is_integer(&self) -> bool {
9077        false
9078    }
9079
9080    #[must_use]
9081    pub fn is_interval(&self) -> bool {
9082        false
9083    }
9084
9085    #[must_use]
9086    pub fn is_numeric(&self) -> bool {
9087        false
9088    }
9089
9090    #[must_use]
9091    pub fn is_object(&self) -> bool {
9092        false
9093    }
9094
9095    /// First-occurrence category index for each category name, matching the
9096    /// semantics of `categories.iter().position(...)` but built once in O(k).
9097    /// `or_insert` keeps the first index if `categories` somehow has dupes.
9098    fn category_index_map(&self) -> FxHashMap<&str, usize> {
9099        let mut map: FxHashMap<&str, usize> = FxHashMap::default();
9100        for (i, cat) in self.categories.iter().enumerate() {
9101            map.entry(cat.as_str()).or_insert(i);
9102        }
9103        map
9104    }
9105
9106    #[must_use]
9107    pub fn codes(&self) -> Vec<Option<usize>> {
9108        // O(n+k): hash category->index once instead of a linear
9109        // `categories.position` scan per label. First-occurrence index
9110        // preserved, so output is bit-identical.
9111        let map = self.category_index_map();
9112        self.labels
9113            .iter()
9114            .map(|label| map.get(label.as_str()).copied())
9115            .collect()
9116    }
9117
9118    #[must_use]
9119    pub fn values(&self) -> Vec<String> {
9120        self.labels.clone()
9121    }
9122
9123    #[must_use]
9124    pub fn to_list(&self) -> Vec<String> {
9125        self.labels.clone()
9126    }
9127
9128    #[must_use]
9129    pub fn tolist(&self) -> Vec<String> {
9130        self.to_list()
9131    }
9132
9133    #[must_use]
9134    pub fn to_numpy(&self) -> Vec<String> {
9135        self.labels.clone()
9136    }
9137
9138    #[must_use]
9139    pub fn array(&self) -> Vec<String> {
9140        self.labels.clone()
9141    }
9142
9143    #[must_use]
9144    pub fn to_index(&self) -> Index {
9145        Index::from_utf8(self.labels.clone()).set_names(self.name.as_deref())
9146    }
9147
9148    /// Stringify each label, matching `pd.CategoricalIndex.format()`.
9149    /// Labels are already strings so this clones them.
9150    #[must_use]
9151    pub fn format(&self) -> Vec<String> {
9152        self.labels.clone()
9153    }
9154
9155    /// Replace positions where `cond` is `false` with `other`, matching
9156    /// `pd.CategoricalIndex.where(cond, other)`. `other` must already be
9157    /// a member of the categories list.
9158    pub fn r#where(&self, cond: &[bool], other: &str) -> Result<Self, IndexError> {
9159        if cond.len() != self.labels.len() {
9160            return Err(IndexError::LengthMismatch {
9161                expected: self.labels.len(),
9162                actual: cond.len(),
9163                context: "where: cond length must match index length".to_owned(),
9164            });
9165        }
9166        if !self.categories.iter().any(|cat| cat == other) {
9167            return Err(IndexError::InvalidArgument(format!(
9168                "where: replacement {other:?} is not a category"
9169            )));
9170        }
9171        let labels: Vec<String> = self
9172            .labels
9173            .iter()
9174            .zip(cond.iter())
9175            .map(|(label, &keep)| {
9176                if keep {
9177                    label.clone()
9178                } else {
9179                    other.to_owned()
9180                }
9181            })
9182            .collect();
9183        Ok(Self {
9184            labels,
9185            categories: self.categories.clone(),
9186            ordered: self.ordered,
9187            name: self.name.clone(),
9188        })
9189    }
9190
9191    /// Replace positions where `mask` is `true` with `value`, matching
9192    /// `pd.CategoricalIndex.putmask(mask, value)`.
9193    pub fn putmask(&self, mask: &[bool], value: &str) -> Result<Self, IndexError> {
9194        if mask.len() != self.labels.len() {
9195            return Err(IndexError::LengthMismatch {
9196                expected: self.labels.len(),
9197                actual: mask.len(),
9198                context: "putmask: mask length must match index length".to_owned(),
9199            });
9200        }
9201        if !self.categories.iter().any(|cat| cat == value) {
9202            return Err(IndexError::InvalidArgument(format!(
9203                "putmask: replacement {value:?} is not a category"
9204            )));
9205        }
9206        let labels: Vec<String> = self
9207            .labels
9208            .iter()
9209            .zip(mask.iter())
9210            .map(|(label, &replace)| {
9211                if replace {
9212                    value.to_owned()
9213                } else {
9214                    label.clone()
9215                }
9216            })
9217            .collect();
9218        Ok(Self {
9219            labels,
9220            categories: self.categories.clone(),
9221            ordered: self.ordered,
9222            name: self.name.clone(),
9223        })
9224    }
9225
9226    /// Alias for [`isna`], matching `pd.CategoricalIndex.isnull()`.
9227    #[must_use]
9228    pub fn isnull(&self) -> Vec<bool> {
9229        self.isna()
9230    }
9231
9232    /// Alias for [`notna`], matching `pd.CategoricalIndex.notnull()`.
9233    #[must_use]
9234    pub fn notnull(&self) -> Vec<bool> {
9235        self.notna()
9236    }
9237
9238    /// Whether any label is missing, matching
9239    /// `pd.CategoricalIndex.hasnans`. Always `false` because the
9240    /// FrankenPandas storage carries only non-null Strings.
9241    #[must_use]
9242    pub fn hasnans(&self) -> bool {
9243        false
9244    }
9245
9246    /// Drop missing positions, matching `pd.CategoricalIndex.dropna()`.
9247    /// Returns a clone because there are no missing labels to drop.
9248    #[must_use]
9249    pub fn dropna(&self) -> Self {
9250        self.clone()
9251    }
9252
9253    /// Fill missing positions, matching `pd.CategoricalIndex.fillna(value)`.
9254    /// Returns a clone because there are no missing labels to fill;
9255    /// `value` is accepted for API parity but ignored.
9256    #[must_use]
9257    pub fn fillna(&self, _value: &str) -> Self {
9258        self.clone()
9259    }
9260
9261    /// Mark the categorical as ordered, matching
9262    /// `pd.CategoricalIndex.as_ordered()`.
9263    #[must_use]
9264    pub fn as_ordered(&self) -> Self {
9265        let mut out = self.clone();
9266        out.ordered = true;
9267        out
9268    }
9269
9270    /// Mark the categorical as unordered, matching
9271    /// `pd.CategoricalIndex.as_unordered()`.
9272    #[must_use]
9273    pub fn as_unordered(&self) -> Self {
9274        let mut out = self.clone();
9275        out.ordered = false;
9276        out
9277    }
9278
9279    /// Extend the categories list with new entries, matching
9280    /// `pd.CategoricalIndex.add_categories(new)`. Rejects when any new
9281    /// category is already present.
9282    pub fn add_categories(&self, new: Vec<String>) -> Result<Self, IndexError> {
9283        // O(k_existing + k_new): hash the existing categories once instead of a
9284        // linear `categories.contains` scan per new entry. First clashing entry
9285        // (in `new` order) is still the one reported.
9286        let existing: FxHashSet<&str> = self.categories.iter().map(String::as_str).collect();
9287        for cat in &new {
9288            if existing.contains(cat.as_str()) {
9289                return Err(IndexError::InvalidArgument(format!(
9290                    "add_categories: {cat:?} is already a category"
9291                )));
9292            }
9293        }
9294        let mut categories = self.categories.clone();
9295        categories.extend(new);
9296        Ok(Self {
9297            labels: self.labels.clone(),
9298            categories,
9299            ordered: self.ordered,
9300            name: self.name.clone(),
9301        })
9302    }
9303
9304    /// Drop categories from the list, matching
9305    /// `pd.CategoricalIndex.remove_categories(removals)`. Rejects when any
9306    /// removed category is still in use by a label (FrankenPandas does not
9307    /// yet carry NaN-labeled categoricals).
9308    pub fn remove_categories(&self, removals: &[String]) -> Result<Self, IndexError> {
9309        // Hash both the category set and the (large) label set once so the
9310        // per-removal validation is O(1) instead of two linear `contains`
9311        // scans — the `self.labels.contains` rescan was O(removals · n_labels).
9312        // Per-removal check order (not-a-category before in-use) is preserved,
9313        // so the first offending removal and its message are unchanged.
9314        let category_set: FxHashSet<&str> = self.categories.iter().map(String::as_str).collect();
9315        let label_set: FxHashSet<&str> = self.labels.iter().map(String::as_str).collect();
9316        for cat in removals {
9317            if !category_set.contains(cat.as_str()) {
9318                return Err(IndexError::InvalidArgument(format!(
9319                    "remove_categories: {cat:?} is not a category"
9320                )));
9321            }
9322            if label_set.contains(cat.as_str()) {
9323                return Err(IndexError::InvalidArgument(format!(
9324                    "remove_categories: {cat:?} is still in use by labels"
9325                )));
9326            }
9327        }
9328        let removals_set: FxHashSet<&String> = removals.iter().collect();
9329        let categories: Vec<String> = self
9330            .categories
9331            .iter()
9332            .filter(|cat| !removals_set.contains(cat))
9333            .cloned()
9334            .collect();
9335        Ok(Self {
9336            labels: self.labels.clone(),
9337            categories,
9338            ordered: self.ordered,
9339            name: self.name.clone(),
9340        })
9341    }
9342
9343    /// Narrow categories to the set of labels actually present, matching
9344    /// `pd.CategoricalIndex.remove_unused_categories()`.
9345    #[must_use]
9346    pub fn remove_unused_categories(&self) -> Self {
9347        let used: FxHashSet<&String> = self.labels.iter().collect();
9348        let categories: Vec<String> = self
9349            .categories
9350            .iter()
9351            .filter(|cat| used.contains(cat))
9352            .cloned()
9353            .collect();
9354        Self {
9355            labels: self.labels.clone(),
9356            categories,
9357            ordered: self.ordered,
9358            name: self.name.clone(),
9359        }
9360    }
9361
9362    /// Replace the categories list, matching
9363    /// `pd.CategoricalIndex.set_categories(new_categories)`. Rejects when
9364    /// any current label is missing from the new categories list.
9365    pub fn set_categories(&self, new_categories: Vec<String>) -> Result<Self, IndexError> {
9366        // O(n+k): hash the new category set once rather than scanning the new
9367        // categories Vec for every label. First label missing from the new
9368        // set (in label order) is still the one reported.
9369        let new_set: FxHashSet<&str> = new_categories.iter().map(String::as_str).collect();
9370        for label in &self.labels {
9371            if !new_set.contains(label.as_str()) {
9372                return Err(IndexError::InvalidArgument(format!(
9373                    "set_categories: label {label:?} is not in the new categories"
9374                )));
9375            }
9376        }
9377        Ok(Self {
9378            labels: self.labels.clone(),
9379            categories: new_categories,
9380            ordered: self.ordered,
9381            name: self.name.clone(),
9382        })
9383    }
9384
9385    /// Rename categories pos-by-pos, matching
9386    /// `pd.CategoricalIndex.rename_categories(new)`. Rejects when the
9387    /// new list has a different length.
9388    pub fn rename_categories(&self, new: Vec<String>) -> Result<Self, IndexError> {
9389        if new.len() != self.categories.len() {
9390            return Err(IndexError::InvalidArgument(format!(
9391                "rename_categories: expected {} new names, got {}",
9392                self.categories.len(),
9393                new.len()
9394            )));
9395        }
9396        let mapping: std::collections::HashMap<&String, &String> =
9397            self.categories.iter().zip(new.iter()).collect();
9398        let labels: Vec<String> = self
9399            .labels
9400            .iter()
9401            .map(|label| (*mapping.get(label).expect("label is a category")).clone())
9402            .collect();
9403        Ok(Self {
9404            labels,
9405            categories: new,
9406            ordered: self.ordered,
9407            name: self.name.clone(),
9408        })
9409    }
9410
9411    /// Reorder the categories list, matching
9412    /// `pd.CategoricalIndex.reorder_categories(new, ordered)`. Rejects
9413    /// when the new list is not a permutation of the existing categories.
9414    pub fn reorder_categories(&self, new: Vec<String>, ordered: bool) -> Result<Self, IndexError> {
9415        if new.len() != self.categories.len() {
9416            return Err(IndexError::InvalidArgument(format!(
9417                "reorder_categories: expected {} categories, got {}",
9418                self.categories.len(),
9419                new.len()
9420            )));
9421        }
9422        let existing: FxHashSet<&String> = self.categories.iter().collect();
9423        for cat in &new {
9424            if !existing.contains(cat) {
9425                return Err(IndexError::InvalidArgument(format!(
9426                    "reorder_categories: {cat:?} is not an existing category"
9427                )));
9428            }
9429        }
9430        let new_set: FxHashSet<&String> = new.iter().collect();
9431        if new_set.len() != new.len() {
9432            return Err(IndexError::InvalidArgument(
9433                "reorder_categories: new categories contain duplicates".to_owned(),
9434            ));
9435        }
9436        Ok(Self {
9437            labels: self.labels.clone(),
9438            categories: new,
9439            ordered,
9440            name: self.name.clone(),
9441        })
9442    }
9443
9444    /// Convert to a flat [`Index`] of utf8 labels, matching
9445    /// `pd.CategoricalIndex.to_flat_index()`.
9446    #[must_use]
9447    pub fn to_flat_index(&self) -> Index {
9448        self.to_index()
9449    }
9450
9451    /// String accessor for categorical string labels.
9452    #[must_use]
9453    pub fn r#str(&self) -> IndexStringAccessor<'_> {
9454        IndexStringAccessor::owned(self.to_flat_index())
9455    }
9456
9457    /// One-column row materialization, matching `pd.CategoricalIndex.to_frame(index=False)`.
9458    #[must_use]
9459    pub fn to_frame(&self) -> Vec<Vec<IndexLabel>> {
9460        self.to_flat_index().to_frame()
9461    }
9462
9463    /// Series-shaped materialization using category labels as both index and values.
9464    #[must_use]
9465    pub fn to_series(&self) -> Vec<(IndexLabel, IndexLabel)> {
9466        self.to_flat_index().to_series()
9467    }
9468
9469    /// Whether any category label coerces to true.
9470    #[must_use]
9471    pub fn any(&self) -> bool {
9472        self.to_flat_index().any()
9473    }
9474
9475    /// Whether all category labels coerce to true.
9476    #[must_use]
9477    pub fn all(&self) -> bool {
9478        self.to_flat_index().all()
9479    }
9480
9481    /// Get labels for a level. CategoricalIndex is flat and only accepts level 0.
9482    pub fn get_level_values(&self, level: usize) -> Result<Index, IndexError> {
9483        self.to_flat_index().get_level_values(level)
9484    }
9485
9486    /// Drop a level. CategoricalIndex is flat, so removing its only level is invalid.
9487    pub fn droplevel(&self, level: usize) -> Result<Index, IndexError> {
9488        self.to_flat_index().droplevel(level)
9489    }
9490
9491    /// Group equal category labels into position buckets.
9492    #[must_use]
9493    pub fn groupby(&self) -> HashMap<IndexLabel, Vec<usize>> {
9494        self.to_flat_index().groupby()
9495    }
9496
9497    /// Apply a function to each category label, returning a flat Index.
9498    #[must_use]
9499    pub fn map<F>(&self, func: F) -> Index
9500    where
9501        F: Fn(&IndexLabel) -> IndexLabel,
9502    {
9503        self.to_flat_index().map(func)
9504    }
9505
9506    /// Cast category labels to a pandas dtype string, returning a flat Index.
9507    pub fn astype(&self, dtype: &str) -> Result<Index, IndexError> {
9508        self.to_flat_index().astype(dtype)
9509    }
9510
9511    /// Nearest preceding-or-equal category label lookup.
9512    #[must_use]
9513    pub fn asof(&self, key: &IndexLabel) -> Option<IndexLabel> {
9514        self.to_flat_index().asof(key)
9515    }
9516
9517    /// Locate nearest preceding-or-equal category positions for each target label.
9518    #[must_use]
9519    pub fn asof_locs(&self, where_index: &Index, mask: Option<&[bool]>) -> Vec<Option<usize>> {
9520        self.to_flat_index().asof_locs(where_index, mask)
9521    }
9522
9523    /// Drop category labels, returning a flat Index.
9524    #[must_use]
9525    pub fn drop(&self, labels_to_drop: &[IndexLabel]) -> Index {
9526        self.to_flat_index().drop(labels_to_drop)
9527    }
9528
9529    /// Join category labels with another flat Index.
9530    pub fn join(&self, other: &Index, how: &str) -> Result<Index, IndexError> {
9531        self.to_flat_index().join(other, how)
9532    }
9533
9534    /// Sort category labels and return the positional sorter.
9535    #[must_use]
9536    pub fn sortlevel(&self) -> (Index, Vec<usize>) {
9537        self.to_flat_index().sortlevel()
9538    }
9539
9540    /// Set the index name, matching `pd.CategoricalIndex.rename(name)`.
9541    #[must_use]
9542    pub fn rename(&self, name: &str) -> Self {
9543        self.set_name(name)
9544    }
9545
9546    /// Returns a clone, matching `pd.CategoricalIndex.view()`.
9547    #[must_use]
9548    pub fn view(&self) -> Self {
9549        self.clone()
9550    }
9551
9552    /// Identity transpose for a 1D index, matching
9553    /// `pd.CategoricalIndex.transpose()`.
9554    #[must_use]
9555    pub fn transpose(&self) -> Self {
9556        self.clone()
9557    }
9558
9559    /// Alias for `transpose`, matching `pd.CategoricalIndex.T`.
9560    #[allow(non_snake_case)]
9561    #[must_use]
9562    pub fn T(&self) -> Self {
9563        self.transpose()
9564    }
9565
9566    /// Flatten labels to a Vec<String>, matching
9567    /// `pd.CategoricalIndex.ravel()`.
9568    #[must_use]
9569    pub fn ravel(&self) -> Vec<String> {
9570        self.labels.clone()
9571    }
9572
9573    /// Number of levels, matching `pd.CategoricalIndex.nlevels`. Always `1`.
9574    #[must_use]
9575    pub fn nlevels(&self) -> usize {
9576        1
9577    }
9578
9579    /// Identity dtype-reinference, matching
9580    /// `pd.CategoricalIndex.infer_objects()`.
9581    #[must_use]
9582    pub fn infer_objects(&self) -> Self {
9583        self.clone()
9584    }
9585
9586    /// Binary-search insertion position, matching
9587    /// `pd.CategoricalIndex.searchsorted(value, side)`. Forwarded through
9588    /// the underlying utf8 Index.
9589    pub fn searchsorted(&self, value: &str, side: &str) -> Result<usize, IndexError> {
9590        self.to_index()
9591            .searchsorted(&IndexLabel::Utf8(value.to_owned()), side)
9592    }
9593
9594    /// Find positions of `[start, end]` for a label slice, matching
9595    /// `pd.CategoricalIndex.slice_locs(start, end)`. Requires labels to
9596    /// be sorted lexicographically (so the searchsorted result lines up
9597    /// with the slice boundary).
9598    pub fn slice_locs(&self, start: &str, end: &str) -> Result<(usize, usize), IndexError> {
9599        let labels_sorted = self.labels.windows(2).all(|w| w[0] <= w[1]);
9600        if !labels_sorted {
9601            return Err(IndexError::InvalidArgument(
9602                "slice_locs requires a CategoricalIndex with labels sorted lexicographically"
9603                    .to_owned(),
9604            ));
9605        }
9606        let left = self.searchsorted(start, "left")?;
9607        let right = self.searchsorted(end, "right")?;
9608        Ok((left, right))
9609    }
9610
9611    /// Half-open positional range for a label slice, matching
9612    /// `pd.CategoricalIndex.slice_indexer(start, end)`.
9613    pub fn slice_indexer(
9614        &self,
9615        start: &str,
9616        end: &str,
9617    ) -> Result<std::ops::Range<usize>, IndexError> {
9618        let (l, r) = self.slice_locs(start, end)?;
9619        Ok(l..r)
9620    }
9621
9622    fn set_op_via_string<F>(&self, other: &Self, op: F) -> Self
9623    where
9624        F: FnOnce(Vec<&String>, Vec<&String>) -> Vec<String>,
9625    {
9626        let labels = op(self.labels.iter().collect(), other.labels.iter().collect());
9627        // Dedup the union of categories with a seen-set instead of an O(k)
9628        // `Vec::contains` per label (O(n·k) for high-cardinality categoricals).
9629        // `seen` borrows self.categories + labels (both stable) — never the
9630        // growing `categories` Vec — so a label is pushed iff it is neither an
9631        // existing category nor already pushed this pass: identical first-seen
9632        // order and dedup to the linear scan.
9633        let mut categories: Vec<String> = self.categories.clone();
9634        let mut seen: FxHashSet<&String> = self.categories.iter().collect();
9635        for label in &labels {
9636            if seen.insert(label) {
9637                categories.push(label.clone());
9638            }
9639        }
9640        Self {
9641            labels,
9642            categories,
9643            ordered: self.ordered,
9644            name: if self.name == other.name {
9645                self.name.clone()
9646            } else {
9647                None
9648            },
9649        }
9650    }
9651
9652    /// Labels in both indexes (first-seen order from self), matching
9653    /// `pd.CategoricalIndex.intersection(other)`.
9654    #[must_use]
9655    pub fn intersection(&self, other: &Self) -> Self {
9656        self.set_op_via_string(other, |left, right| {
9657            let right_set: FxHashSet<&&String> = right.iter().collect();
9658            let mut seen = FxHashSet::<&String>::default();
9659            left.into_iter()
9660                .filter(|label| right_set.contains(label) && seen.insert(label))
9661                .cloned()
9662                .collect()
9663        })
9664    }
9665
9666    /// Self labels then other labels not seen, matching
9667    /// `pd.CategoricalIndex.union(other)`.
9668    #[must_use]
9669    pub fn union(&self, other: &Self) -> Self {
9670        self.set_op_via_string(other, |left, right| {
9671            let mut seen = FxHashSet::<&String>::default();
9672            left.into_iter()
9673                .chain(right)
9674                .filter(|label| seen.insert(label))
9675                .cloned()
9676                .collect()
9677        })
9678    }
9679
9680    /// Labels in either but not both, matching
9681    /// `pd.CategoricalIndex.symmetric_difference(other)`.
9682    #[must_use]
9683    pub fn symmetric_difference(&self, other: &Self) -> Self {
9684        self.set_op_via_string(other, |left, right| {
9685            let left_set: FxHashSet<&&String> = left.iter().collect();
9686            let right_set: FxHashSet<&&String> = right.iter().collect();
9687            let mut seen = FxHashSet::<&String>::default();
9688            let mut out = Vec::<String>::new();
9689            for label in &left {
9690                if !right_set.contains(label) && seen.insert(*label) {
9691                    out.push((*label).clone());
9692                }
9693            }
9694            for label in &right {
9695                if !left_set.contains(label) && seen.insert(*label) {
9696                    out.push((*label).clone());
9697                }
9698            }
9699            out
9700        })
9701    }
9702
9703    /// Self labels not in other, matching
9704    /// `pd.CategoricalIndex.difference(other)`.
9705    #[must_use]
9706    pub fn difference(&self, other: &Self) -> Self {
9707        // Per br-frankenpandas-6r1lq: difference preserves self.name (not
9708        // shared_name like set_op_via_string applies for union/intersection).
9709        let mut out = self.set_op_via_string(other, |left, right| {
9710            let right_set: FxHashSet<&&String> = right.iter().collect();
9711            let mut seen = FxHashSet::<&String>::default();
9712            left.into_iter()
9713                .filter(|label| !right_set.contains(label) && seen.insert(label))
9714                .cloned()
9715                .collect()
9716        });
9717        out.name = self.name.clone();
9718        out
9719    }
9720
9721    /// Sort labels ascending, matching `pd.CategoricalIndex.sort_values()`.
9722    /// `ordered=true` sorts by category position; `ordered=false` sorts
9723    /// lexicographically. Categories list and ordered flag are preserved.
9724    #[must_use]
9725    pub fn sort_values(&self) -> Self {
9726        let positions = self.argsort();
9727        let labels: Vec<String> = positions.iter().map(|&p| self.labels[p].clone()).collect();
9728        Self {
9729            labels,
9730            categories: self.categories.clone(),
9731            ordered: self.ordered,
9732            name: self.name.clone(),
9733        }
9734    }
9735
9736    /// Alias for `sort_values`, matching `pd.CategoricalIndex.sort()`.
9737    #[must_use]
9738    pub fn sort(&self) -> Self {
9739        self.sort_values()
9740    }
9741
9742    /// Positions that would sort labels ascending, matching
9743    /// `pd.CategoricalIndex.argsort()`.
9744    ///
9745    /// pandas sorts a Categorical by its **category codes** — the position of
9746    /// each label within `categories` — for both ordered and unordered
9747    /// categoricals (`Categorical._values_for_argsort` returns `self.codes`),
9748    /// NOT lexicographically by the label text. So categories `[b, a, c]` sort
9749    /// before-`a` because `b` has code 0. The sort is stable, so equal-code
9750    /// ties keep their original order. CategoricalIndex labels are non-null, so
9751    /// every label resolves to a code. When the category order happens to be
9752    /// lexicographic this is identical to the old text sort; only
9753    /// non-lexicographic category orders are corrected.
9754    #[must_use]
9755    pub fn argsort(&self) -> Vec<usize> {
9756        let map = self.category_index_map();
9757        let mut positions: Vec<usize> = (0..self.labels.len()).collect();
9758        positions.sort_by_key(|&i| {
9759            map.get(self.labels[i].as_str())
9760                .copied()
9761                .unwrap_or(usize::MAX)
9762        });
9763        positions
9764    }
9765
9766    /// Concatenate with another CategoricalIndex, matching
9767    /// `pd.CategoricalIndex.append(other)`. Categories merge
9768    /// (other-only categories are appended) and the index name is
9769    /// preserved when both operands share it.
9770    #[must_use]
9771    pub fn append(&self, other: &Self) -> Self {
9772        let mut labels = self.labels.clone();
9773        labels.extend_from_slice(&other.labels);
9774        // Union categories with a seen-set, not O(k) `Vec::contains` per entry
9775        // (see set_op_via_string). `seen` borrows self/other categories, never
9776        // the growing `categories` Vec; identical first-seen order + dedup.
9777        let mut categories = self.categories.clone();
9778        let mut seen: FxHashSet<&String> = self.categories.iter().collect();
9779        for cat in &other.categories {
9780            if seen.insert(cat) {
9781                categories.push(cat.clone());
9782            }
9783        }
9784        let name = if self.name == other.name {
9785            self.name.clone()
9786        } else {
9787            None
9788        };
9789        Self {
9790            labels,
9791            categories,
9792            ordered: self.ordered && other.ordered,
9793            name,
9794        }
9795    }
9796
9797    /// Remove the label at the given position, matching
9798    /// `pd.CategoricalIndex.delete(loc)`. OOB raises.
9799    pub fn delete(&self, loc: usize) -> Result<Self, IndexError> {
9800        if loc >= self.labels.len() {
9801            return Err(IndexError::OutOfBounds {
9802                position: loc,
9803                length: self.labels.len(),
9804            });
9805        }
9806        let mut labels = self.labels.clone();
9807        labels.remove(loc);
9808        Ok(Self {
9809            labels,
9810            categories: self.categories.clone(),
9811            ordered: self.ordered,
9812            name: self.name.clone(),
9813        })
9814    }
9815
9816    /// Insert `value` at position `loc`, matching
9817    /// `pd.CategoricalIndex.insert(loc, value)`. The value must be a
9818    /// member of the categories list; OOB and not-a-category raise.
9819    pub fn insert(&self, loc: usize, value: &str) -> Result<Self, IndexError> {
9820        if loc > self.labels.len() {
9821            return Err(IndexError::OutOfBounds {
9822                position: loc,
9823                length: self.labels.len(),
9824            });
9825        }
9826        if !self.categories.iter().any(|cat| cat == value) {
9827            return Err(IndexError::InvalidArgument(format!(
9828                "insert: {value:?} is not a category"
9829            )));
9830        }
9831        let mut labels = self.labels.clone();
9832        labels.insert(loc, value.to_owned());
9833        Ok(Self {
9834            labels,
9835            categories: self.categories.clone(),
9836            ordered: self.ordered,
9837            name: self.name.clone(),
9838        })
9839    }
9840
9841    /// Repeat each label `repeats` times, matching
9842    /// `pd.CategoricalIndex.repeat(repeats)`.
9843    #[must_use]
9844    pub fn repeat(&self, repeats: usize) -> Self {
9845        let mut labels = Vec::with_capacity(self.labels.len() * repeats);
9846        for label in &self.labels {
9847            for _ in 0..repeats {
9848                labels.push(label.clone());
9849            }
9850        }
9851        Self {
9852            labels,
9853            categories: self.categories.clone(),
9854            ordered: self.ordered,
9855            name: self.name.clone(),
9856        }
9857    }
9858
9859    /// Pick labels at the given positions, matching
9860    /// `pd.CategoricalIndex.take(positions)`. Out-of-bounds positions
9861    /// raise [`IndexError::OutOfBounds`].
9862    pub fn take(&self, positions: &[usize]) -> Result<Self, IndexError> {
9863        for &p in positions {
9864            if p >= self.labels.len() {
9865                return Err(IndexError::OutOfBounds {
9866                    position: p,
9867                    length: self.labels.len(),
9868                });
9869            }
9870        }
9871        let labels: Vec<String> = positions.iter().map(|&p| self.labels[p].clone()).collect();
9872        Ok(Self {
9873            labels,
9874            categories: self.categories.clone(),
9875            ordered: self.ordered,
9876            name: self.name.clone(),
9877        })
9878    }
9879
9880    /// Per-position membership mask, matching
9881    /// `pd.CategoricalIndex.isin(values)`.
9882    #[must_use]
9883    pub fn isin(&self, values: &[String]) -> Vec<bool> {
9884        let needle: FxHashSet<&String> = values.iter().collect();
9885        self.labels.iter().map(|l| needle.contains(l)).collect()
9886    }
9887
9888    /// Locate every position matching each target, matching
9889    /// `pd.CategoricalIndex.get_indexer_non_unique(targets)`.
9890    #[must_use]
9891    pub fn get_indexer_non_unique(&self, targets: &[String]) -> (Vec<isize>, Vec<usize>) {
9892        let mut by_value = FxHashMap::<&String, Vec<usize>>::default();
9893        for (i, label) in self.labels.iter().enumerate() {
9894            by_value.entry(label).or_default().push(i);
9895        }
9896        let mut positions = Vec::<isize>::new();
9897        let mut missing = Vec::<usize>::new();
9898        for (idx, target) in targets.iter().enumerate() {
9899            if let Some(matches) = by_value.get(target) {
9900                positions.extend(
9901                    matches
9902                        .iter()
9903                        .map(|p| isize::try_from(*p).unwrap_or(isize::MAX)),
9904                );
9905            } else {
9906                positions.push(-1);
9907                missing.push(idx);
9908            }
9909        }
9910        (positions, missing)
9911    }
9912
9913    /// Locate each label in `targets`, matching
9914    /// `pd.CategoricalIndex.get_indexer(targets)`.
9915    #[must_use]
9916    pub fn get_indexer(&self, targets: &[String]) -> Vec<isize> {
9917        let mut positions = FxHashMap::<&String, isize>::default();
9918        for (i, label) in self.labels.iter().enumerate() {
9919            positions
9920                .entry(label)
9921                .or_insert_with(|| isize::try_from(i).unwrap_or(isize::MAX));
9922        }
9923        targets
9924            .iter()
9925            .map(|t| positions.get(t).copied().unwrap_or(-1))
9926            .collect()
9927    }
9928
9929    /// Alias for [`get_indexer`], matching
9930    /// `pd.CategoricalIndex.get_indexer_for(targets)`.
9931    #[must_use]
9932    pub fn get_indexer_for(&self, targets: &[String]) -> Vec<isize> {
9933        self.get_indexer(targets)
9934    }
9935
9936    /// First position of `value`, matching
9937    /// `pd.CategoricalIndex.get_loc(value)`.
9938    pub fn get_loc(&self, value: &str) -> Result<usize, IndexError> {
9939        self.labels.iter().position(|l| l == value).ok_or_else(|| {
9940            IndexError::InvalidArgument(format!("get_loc: {value:?} not in CategoricalIndex"))
9941        })
9942    }
9943
9944    /// Position of the maximum label, matching
9945    /// `pd.CategoricalIndex.argmax()`. ordered=true uses category
9946    /// position; unordered uses lexicographic ordering. Empty raises
9947    /// pandas-style "attempt to get argmax of an empty sequence".
9948    pub fn argmax(&self) -> Result<usize, IndexError> {
9949        if self.labels.is_empty() {
9950            return Err(IndexError::InvalidArgument(
9951                "attempt to get argmax of an empty sequence".to_owned(),
9952            ));
9953        }
9954        let mut best = 0;
9955        if self.ordered {
9956            let map = self.category_index_map();
9957            let position = |label: &String| map.get(label.as_str()).copied().unwrap_or(0);
9958            for i in 1..self.labels.len() {
9959                if position(&self.labels[i]) > position(&self.labels[best]) {
9960                    best = i;
9961                }
9962            }
9963        } else {
9964            for i in 1..self.labels.len() {
9965                if self.labels[i] > self.labels[best] {
9966                    best = i;
9967                }
9968            }
9969        }
9970        Ok(best)
9971    }
9972
9973    /// Position of the minimum label, matching
9974    /// `pd.CategoricalIndex.argmin()`. Same ordering rules as argmax.
9975    pub fn argmin(&self) -> Result<usize, IndexError> {
9976        if self.labels.is_empty() {
9977            return Err(IndexError::InvalidArgument(
9978                "attempt to get argmin of an empty sequence".to_owned(),
9979            ));
9980        }
9981        let mut best = 0;
9982        if self.ordered {
9983            let map = self.category_index_map();
9984            let position = |label: &String| map.get(label.as_str()).copied().unwrap_or(usize::MAX);
9985            for i in 1..self.labels.len() {
9986                if position(&self.labels[i]) < position(&self.labels[best]) {
9987                    best = i;
9988                }
9989            }
9990        } else {
9991            for i in 1..self.labels.len() {
9992                if self.labels[i] < self.labels[best] {
9993                    best = i;
9994                }
9995            }
9996        }
9997        Ok(best)
9998    }
9999
10000    /// Smallest label in category order when ordered, lexicographic when
10001    /// unordered, matching `pd.CategoricalIndex.min()`. Empty returns
10002    /// `None`.
10003    #[must_use]
10004    pub fn min(&self) -> Option<&str> {
10005        if self.labels.is_empty() {
10006            return None;
10007        }
10008        if self.ordered {
10009            // Compare by category position (hashed once, O(n+k)).
10010            let map = self.category_index_map();
10011            let position = |label: &String| map.get(label.as_str()).copied().unwrap_or(usize::MAX);
10012            self.labels
10013                .iter()
10014                .min_by_key(|label| position(label))
10015                .map(String::as_str)
10016        } else {
10017            self.labels.iter().min().map(String::as_str)
10018        }
10019    }
10020
10021    /// Largest label, matching `pd.CategoricalIndex.max()`.
10022    #[must_use]
10023    pub fn max(&self) -> Option<&str> {
10024        if self.labels.is_empty() {
10025            return None;
10026        }
10027        if self.ordered {
10028            let map = self.category_index_map();
10029            let position = |label: &String| map.get(label.as_str()).copied().unwrap_or(0);
10030            self.labels
10031                .iter()
10032                .max_by_key(|label| position(label))
10033                .map(String::as_str)
10034        } else {
10035            self.labels.iter().max().map(String::as_str)
10036        }
10037    }
10038
10039    /// First-seen unique labels, matching `pd.CategoricalIndex.unique()`.
10040    /// Categories are preserved (not narrowed to seen labels) and the
10041    /// ordered flag rolls through. The result keeps the index name.
10042    #[must_use]
10043    pub fn unique(&self) -> Self {
10044        let mut seen = FxHashSet::<&String>::default();
10045        let mut uniques = Vec::<String>::new();
10046        for label in &self.labels {
10047            if seen.insert(label) {
10048                uniques.push(label.clone());
10049            }
10050        }
10051        Self {
10052            labels: uniques,
10053            categories: self.categories.clone(),
10054            ordered: self.ordered,
10055            name: self.name.clone(),
10056        }
10057    }
10058
10059    /// Per-position duplicate mask, matching
10060    /// `pd.CategoricalIndex.duplicated(keep)`.
10061    #[must_use]
10062    pub fn duplicated(&self, keep: DuplicateKeep) -> Vec<bool> {
10063        self.to_index().duplicated(keep)
10064    }
10065
10066    /// Drop duplicate labels (keep first), matching
10067    /// `pd.CategoricalIndex.drop_duplicates()`. Categories and ordered
10068    /// flag are preserved.
10069    #[must_use]
10070    pub fn drop_duplicates(&self) -> Self {
10071        self.unique()
10072    }
10073
10074    /// Value counts, matching `pd.CategoricalIndex.value_counts()`.
10075    /// CategoricalIndex labels are non-null so the total equals `len()`.
10076    #[must_use]
10077    pub fn value_counts(&self) -> Vec<(String, usize)> {
10078        let mut order = Vec::<&String>::new();
10079        let mut counts = FxHashMap::<&String, usize>::default();
10080        for label in &self.labels {
10081            let entry = counts.entry(label).or_insert_with(|| {
10082                order.push(label);
10083                0
10084            });
10085            *entry += 1;
10086        }
10087        let mut pairs: Vec<(String, usize)> =
10088            order.iter().map(|s| ((*s).clone(), counts[*s])).collect();
10089        // Pandas sorts descending by count for value_counts.
10090        pairs.sort_by_key(|entry| std::cmp::Reverse(entry.1));
10091        pairs
10092    }
10093
10094    /// Factorize, matching `pd.CategoricalIndex.factorize()`. Returns
10095    /// `(codes, uniques)` where `uniques` is a CategoricalIndex with
10096    /// the same categories list.
10097    #[must_use]
10098    pub fn factorize(&self) -> (Vec<isize>, Self) {
10099        let mut positions = FxHashMap::<&String, isize>::default();
10100        let mut uniques = Vec::<String>::new();
10101        let mut codes = Vec::with_capacity(self.labels.len());
10102        for label in &self.labels {
10103            if let Some(code) = positions.get(label) {
10104                codes.push(*code);
10105            } else {
10106                let code = isize::try_from(uniques.len()).unwrap_or(isize::MAX);
10107                positions.insert(label, code);
10108                uniques.push(label.clone());
10109                codes.push(code);
10110            }
10111        }
10112        let unique_index = Self {
10113            labels: uniques,
10114            categories: self.categories.clone(),
10115            ordered: self.ordered,
10116            name: self.name.clone(),
10117        };
10118        (codes, unique_index)
10119    }
10120}
10121
10122#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
10123pub struct AlignmentPlan {
10124    pub union_index: Index,
10125    pub left_positions: Vec<Option<usize>>,
10126    pub right_positions: Vec<Option<usize>>,
10127}
10128
10129#[derive(Debug, Error, Clone, PartialEq, Eq)]
10130#[non_exhaustive]
10131pub enum IndexError {
10132    #[error("alignment vectors must have equal lengths")]
10133    InvalidAlignmentVectors,
10134    #[error("position {position} out of bounds for length {length}")]
10135    OutOfBounds { position: usize, length: usize },
10136    #[error("length mismatch: expected {expected}, got {actual} ({context})")]
10137    LengthMismatch {
10138        expected: usize,
10139        actual: usize,
10140        context: String,
10141    },
10142    #[error("invalid argument: {0}")]
10143    InvalidArgument(String),
10144}
10145
10146/// Alignment mode for index-level join semantics.
10147#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10148pub enum AlignMode {
10149    /// Only labels present in both indexes.
10150    Inner,
10151    /// All left labels; right fills with None for missing.
10152    Left,
10153    /// All right labels; left fills with None for missing.
10154    Right,
10155    /// All labels from both indexes (union). Default for arithmetic.
10156    Outer,
10157}
10158
10159fn index_position_groups(index: &Index) -> FxHashMap<IndexLabel, Vec<usize>> {
10160    let mut groups: FxHashMap<IndexLabel, Vec<usize>> = FxHashMap::default();
10161    for (pos, label) in index.labels().iter().enumerate() {
10162        groups.entry(label.clone()).or_default().push(pos);
10163    }
10164    groups
10165}
10166
10167fn align_non_unique(left: &Index, right: &Index, mode: AlignMode) -> AlignmentPlan {
10168    let left_groups = index_position_groups(left);
10169    let right_groups = index_position_groups(right);
10170
10171    let mut out_labels = Vec::new();
10172    let mut left_positions = Vec::new();
10173    let mut right_positions = Vec::new();
10174
10175    match mode {
10176        AlignMode::Inner => {
10177            for (left_pos, label) in left.labels().iter().enumerate() {
10178                if let Some(right_hits) = right_groups.get(label) {
10179                    for &right_pos in right_hits {
10180                        out_labels.push(label.clone());
10181                        left_positions.push(Some(left_pos));
10182                        right_positions.push(Some(right_pos));
10183                    }
10184                }
10185            }
10186        }
10187        AlignMode::Left => {
10188            for (left_pos, label) in left.labels().iter().enumerate() {
10189                match right_groups.get(label) {
10190                    Some(right_hits) if !right_hits.is_empty() => {
10191                        for &right_pos in right_hits {
10192                            out_labels.push(label.clone());
10193                            left_positions.push(Some(left_pos));
10194                            right_positions.push(Some(right_pos));
10195                        }
10196                    }
10197                    _ => {
10198                        out_labels.push(label.clone());
10199                        left_positions.push(Some(left_pos));
10200                        right_positions.push(None);
10201                    }
10202                }
10203            }
10204        }
10205        AlignMode::Right => {
10206            for (right_pos, label) in right.labels().iter().enumerate() {
10207                match left_groups.get(label) {
10208                    Some(left_hits) if !left_hits.is_empty() => {
10209                        for &left_pos in left_hits {
10210                            out_labels.push(label.clone());
10211                            left_positions.push(Some(left_pos));
10212                            right_positions.push(Some(right_pos));
10213                        }
10214                    }
10215                    _ => {
10216                        out_labels.push(label.clone());
10217                        left_positions.push(None);
10218                        right_positions.push(Some(right_pos));
10219                    }
10220                }
10221            }
10222        }
10223        AlignMode::Outer => {
10224            for (left_pos, label) in left.labels().iter().enumerate() {
10225                match right_groups.get(label) {
10226                    Some(right_hits) if !right_hits.is_empty() => {
10227                        for &right_pos in right_hits {
10228                            out_labels.push(label.clone());
10229                            left_positions.push(Some(left_pos));
10230                            right_positions.push(Some(right_pos));
10231                        }
10232                    }
10233                    _ => {
10234                        out_labels.push(label.clone());
10235                        left_positions.push(Some(left_pos));
10236                        right_positions.push(None);
10237                    }
10238                }
10239            }
10240
10241            for (right_pos, label) in right.labels().iter().enumerate() {
10242                if !left_groups.contains_key(label) {
10243                    out_labels.push(label.clone());
10244                    left_positions.push(None);
10245                    right_positions.push(Some(right_pos));
10246                }
10247            }
10248        }
10249    }
10250
10251    let mut union_index = Index::new(out_labels);
10252    match mode {
10253        AlignMode::Left => {
10254            union_index.name = left.name.clone();
10255        }
10256        AlignMode::Right => {
10257            union_index.name = right.name.clone();
10258        }
10259        AlignMode::Inner | AlignMode::Outer => {}
10260    }
10261
10262    AlignmentPlan {
10263        union_index,
10264        left_positions,
10265        right_positions,
10266    }
10267}
10268
10269/// Align two indexes using the specified join mode.
10270///
10271/// Returns an `AlignmentPlan` whose `union_index` contains the output index
10272/// (which may be an intersection, left-only, right-only, or union depending on mode).
10273pub fn align(left: &Index, right: &Index, mode: AlignMode) -> AlignmentPlan {
10274    if left.has_duplicates() || right.has_duplicates() {
10275        return align_non_unique(left, right, mode);
10276    }
10277
10278    match mode {
10279        AlignMode::Inner => align_inner(left, right),
10280        AlignMode::Left => align_left(left, right),
10281        AlignMode::Right => {
10282            let plan = align_left(right, left);
10283            AlignmentPlan {
10284                union_index: plan.union_index,
10285                left_positions: plan.right_positions,
10286                right_positions: plan.left_positions,
10287            }
10288        }
10289        AlignMode::Outer => align_union(left, right),
10290    }
10291}
10292
10293/// Inner alignment: only labels present in both indexes.
10294///
10295/// For non-unique labels, emits cartesian matches preserving left order.
10296pub fn align_inner(left: &Index, right: &Index) -> AlignmentPlan {
10297    if left.has_duplicates() || right.has_duplicates() {
10298        return align_non_unique(left, right, AlignMode::Inner);
10299    }
10300
10301    let right_map = right.position_map_first_ref();
10302
10303    let mut output_labels = Vec::new();
10304    let mut left_positions = Vec::new();
10305    let mut right_positions = Vec::new();
10306
10307    for (left_pos, label) in left.labels.iter().enumerate() {
10308        if let Some(&right_pos) = right_map.get(label) {
10309            output_labels.push(label.clone());
10310            left_positions.push(Some(left_pos));
10311            right_positions.push(Some(right_pos));
10312        }
10313    }
10314
10315    // Per br-frankenpandas-m2i5n: pandas inner alignment preserves the
10316    // shared index name (preserved when both operands agree, None when
10317    // they differ). Mirrors align_non_unique handling.
10318    let shared_name = if left.name() == right.name() {
10319        left.name().map(str::to_owned)
10320    } else {
10321        None
10322    };
10323    let mut union_index = Index::new(output_labels);
10324    union_index.name = shared_name;
10325    AlignmentPlan {
10326        union_index,
10327        left_positions,
10328        right_positions,
10329    }
10330}
10331
10332/// Left alignment: all left labels preserved, right fills with None for missing.
10333pub fn align_left(left: &Index, right: &Index) -> AlignmentPlan {
10334    if left.has_duplicates() || right.has_duplicates() {
10335        return align_non_unique(left, right, AlignMode::Left);
10336    }
10337
10338    let right_map = right.position_map_first_ref();
10339
10340    let mut left_positions = Vec::with_capacity(left.len());
10341    let mut right_positions = Vec::with_capacity(left.len());
10342
10343    for (left_pos, label) in left.labels.iter().enumerate() {
10344        left_positions.push(Some(left_pos));
10345        right_positions.push(right_map.get(label).copied());
10346    }
10347
10348    AlignmentPlan {
10349        union_index: left.clone(),
10350        left_positions,
10351        right_positions,
10352    }
10353}
10354
10355pub fn align_union(left: &Index, right: &Index) -> AlignmentPlan {
10356    if left.has_duplicates() || right.has_duplicates() {
10357        return align_non_unique(left, right, AlignMode::Outer);
10358    }
10359
10360    let left_positions_map = left.position_map_first_ref();
10361    let right_positions_map = right.position_map_first_ref();
10362
10363    let mut union_labels = Vec::with_capacity(left.labels.len() + right.labels.len());
10364    union_labels.extend(left.labels.iter().cloned());
10365    for label in &right.labels {
10366        if !left_positions_map.contains_key(&label) {
10367            union_labels.push(label.clone());
10368        }
10369    }
10370
10371    let left_positions = union_labels
10372        .iter()
10373        .map(|label| left_positions_map.get(&label).copied())
10374        .collect();
10375
10376    let right_positions = union_labels
10377        .iter()
10378        .map(|label| right_positions_map.get(&label).copied())
10379        .collect();
10380
10381    // Per br-frankenpandas-r4k11: pandas outer alignment preserves the
10382    // shared index name. Mirrors align_inner / align_non_unique handling.
10383    let shared_name = if left.name() == right.name() {
10384        left.name().map(str::to_owned)
10385    } else {
10386        None
10387    };
10388    let mut union_index = Index::new(union_labels);
10389    union_index.name = shared_name;
10390    AlignmentPlan {
10391        union_index,
10392        left_positions,
10393        right_positions,
10394    }
10395}
10396
10397pub fn validate_alignment_plan(plan: &AlignmentPlan) -> Result<(), IndexError> {
10398    if plan.left_positions.len() != plan.right_positions.len()
10399        || plan.left_positions.len() != plan.union_index.len()
10400    {
10401        return Err(IndexError::InvalidAlignmentVectors);
10402    }
10403
10404    Ok(())
10405}
10406
10407// ── AG-11: Leapfrog Triejoin for Multi-Way Index Alignment ─────────────
10408
10409/// Result of multi-way alignment: a union index plus per-input position vectors.
10410#[derive(Debug, Clone, PartialEq, Eq)]
10411pub struct MultiAlignmentPlan {
10412    pub union_index: Index,
10413    pub positions: Vec<Vec<Option<usize>>>,
10414}
10415
10416/// K-way merge union of multiple sorted iterators.
10417///
10418/// Produces a sorted, deduplicated index containing all labels from all inputs.
10419/// Each input is sorted internally before merging. Uses a min-heap for O(N log K)
10420/// performance where N = total labels and K = number of indexes.
10421pub fn leapfrog_union(indexes: &[&Index]) -> Index {
10422    if indexes.is_empty() {
10423        return Index::new(Vec::new());
10424    }
10425    if indexes.len() == 1 {
10426        return indexes[0].unique().sort_values();
10427    }
10428
10429    // Sort and dedup each input
10430    let sorted: Vec<Vec<&IndexLabel>> = indexes
10431        .iter()
10432        .map(|idx| {
10433            let mut labels: Vec<&IndexLabel> = idx.labels().iter().collect();
10434            labels.sort();
10435            labels.dedup();
10436            labels
10437        })
10438        .collect();
10439
10440    // Initialize min-heap: (label, iter_index, position_in_iter)
10441    let mut heap = std::collections::BinaryHeap::new();
10442    for (i, iter) in sorted.iter().enumerate() {
10443        if !iter.is_empty() {
10444            heap.push(std::cmp::Reverse((iter[0].clone(), i, 0_usize)));
10445        }
10446    }
10447
10448    let total: usize = sorted.iter().map(|s| s.len()).sum();
10449    let mut result = Vec::with_capacity(total);
10450
10451    while let Some(std::cmp::Reverse((label, iter_idx, pos))) = heap.pop() {
10452        // Deduplicate: only push if different from last
10453        if result.last() != Some(&label) {
10454            result.push(label);
10455        }
10456
10457        let next_pos = pos + 1;
10458        if next_pos < sorted[iter_idx].len() {
10459            heap.push(std::cmp::Reverse((
10460                sorted[iter_idx][next_pos].clone(),
10461                iter_idx,
10462                next_pos,
10463            )));
10464        }
10465    }
10466
10467    Index::new(result)
10468}
10469
10470/// Leapfrog intersection: labels present in ALL input indexes.
10471///
10472/// Classic leapfrog algorithm on sorted iterators. For each position,
10473/// advance the smallest iterator to seek the maximum. When all iterators
10474/// agree, emit the label.
10475pub fn leapfrog_intersection(indexes: &[&Index]) -> Index {
10476    if indexes.is_empty() {
10477        return Index::new(Vec::new());
10478    }
10479    if indexes.len() == 1 {
10480        return indexes[0].unique().sort_values();
10481    }
10482
10483    // Sort and dedup each input
10484    let sorted: Vec<Vec<&IndexLabel>> = indexes
10485        .iter()
10486        .map(|idx| {
10487            let mut labels: Vec<&IndexLabel> = idx.labels().iter().collect();
10488            labels.sort();
10489            labels.dedup();
10490            labels
10491        })
10492        .collect();
10493
10494    // Cursors into each sorted iterator
10495    let k = sorted.len();
10496    let mut cursors: Vec<usize> = vec![0; k];
10497    let mut result = Vec::new();
10498
10499    'outer: loop {
10500        // Check if any iterator is exhausted
10501        for i in 0..k {
10502            if cursors[i] >= sorted[i].len() {
10503                break 'outer;
10504            }
10505        }
10506
10507        // Find the max label across all cursors
10508        let mut max_label = sorted[0][cursors[0]];
10509        for i in 1..k {
10510            if sorted[i][cursors[i]] > max_label {
10511                max_label = sorted[i][cursors[i]];
10512            }
10513        }
10514
10515        // Advance all cursors to at least max_label
10516        let mut all_equal = true;
10517        for i in 0..k {
10518            // Binary search for max_label in sorted[i] starting from cursors[i]
10519            let remaining = &sorted[i][cursors[i]..];
10520            match remaining.binary_search(&max_label) {
10521                Ok(offset) => {
10522                    cursors[i] += offset;
10523                }
10524                Err(offset) => {
10525                    cursors[i] += offset;
10526                    all_equal = false;
10527                }
10528            }
10529            if cursors[i] >= sorted[i].len() {
10530                break 'outer;
10531            }
10532        }
10533
10534        if all_equal {
10535            // All iterators point to the same label
10536            result.push(max_label.clone());
10537            for cursor in &mut cursors {
10538                *cursor += 1;
10539            }
10540        }
10541        // If not all equal, the loop continues with updated cursors
10542    }
10543
10544    Index::new(result)
10545}
10546
10547/// Multi-way alignment: union all indexes, then compute position vectors.
10548///
10549/// This is the AGM-bound-optimal replacement for iterative pairwise `align_union`.
10550/// For N indexes, produces a single sorted union index and N position vectors
10551/// mapping each union label to its original position in each input.
10552pub fn multi_way_align(indexes: &[&Index]) -> MultiAlignmentPlan {
10553    if indexes.is_empty() {
10554        return MultiAlignmentPlan {
10555            union_index: Index::new(Vec::new()),
10556            positions: Vec::new(),
10557        };
10558    }
10559
10560    // Preserve pandas-style union ordering: start with the first index's labels,
10561    // then append unseen labels from subsequent indexes in encounter order.
10562    // This matches iterative align_union(sort=False) semantics while avoiding
10563    // the O(N*K) pairwise alignment cascade.
10564    // Borrow labels into the membership set (no clone per label) and clone only
10565    // the first-seen ones into the output. The prior version cloned EVERY label
10566    // into an owned HashSet<IndexLabel> (even duplicates) — clone-bound. Borrowed
10567    // keys + FxHashSet leave only the unique-label output clones. The borrow is
10568    // valid: every &IndexLabel comes from `indexes`, which outlives this scan.
10569    let mut seen: FxHashSet<&IndexLabel> = FxHashSet::with_capacity_and_hasher(
10570        indexes.iter().map(|idx| idx.labels().len()).sum(),
10571        Default::default(),
10572    );
10573    let mut union_labels: Vec<IndexLabel> = Vec::new();
10574    for idx in indexes {
10575        for label in idx.labels() {
10576            if seen.insert(label) {
10577                union_labels.push(label.clone());
10578            }
10579        }
10580    }
10581    // Per br-frankenpandas-nrhjq: pandas multi-index union sets name to
10582    // the shared name across all inputs (= None if any differ).
10583    let first_name = indexes
10584        .first()
10585        .and_then(|idx| idx.name())
10586        .map(str::to_owned);
10587    let shared_name = if indexes
10588        .iter()
10589        .all(|idx| idx.name() == first_name.as_deref())
10590    {
10591        first_name
10592    } else {
10593        None
10594    };
10595    let mut union = Index::new(union_labels);
10596    union.name = shared_name;
10597
10598    // Build position maps for each input
10599    let maps: Vec<FxHashMap<&IndexLabel, usize>> = indexes
10600        .iter()
10601        .map(|idx| idx.position_map_first_ref())
10602        .collect();
10603
10604    let positions: Vec<Vec<Option<usize>>> = maps
10605        .iter()
10606        .map(|map| {
10607            union
10608                .labels
10609                .iter()
10610                .map(|label| map.get(label).copied())
10611                .collect()
10612        })
10613        .collect();
10614
10615    MultiAlignmentPlan {
10616        union_index: union,
10617        positions,
10618    }
10619}
10620
10621// ── TimedeltaIndex helpers ──────────────────────────────────────────────
10622
10623/// Error for timedelta_range parameter combinations.
10624#[derive(Debug, Clone, Error)]
10625pub enum TimedeltaRangeError {
10626    #[error("must specify at least two of start, end, periods")]
10627    InsufficientParams,
10628    #[error("must specify no more than two of start, end, periods")]
10629    TooManyParams,
10630    #[error("freq must be positive")]
10631    NonPositiveFreq,
10632    #[error("cannot compute range: end < start with positive freq")]
10633    InvalidRange,
10634}
10635
10636/// Create a TimedeltaIndex with evenly spaced values.
10637///
10638/// Analogous to `pd.timedelta_range()`. Must specify exactly two of:
10639/// start, end, periods. Frequency defaults to 1 day (86_400_000_000_000 ns).
10640///
10641/// # Examples
10642/// ```
10643/// use fp_index::timedelta_range;
10644/// use fp_types::Timedelta;
10645///
10646/// let idx = timedelta_range(
10647///     Some(Timedelta::NANOS_PER_DAY),
10648///     None,
10649///     Some(3),
10650///     Timedelta::NANOS_PER_DAY,
10651///     None,
10652/// ).unwrap();
10653/// assert_eq!(idx.len(), 3);
10654/// ```
10655pub fn timedelta_range(
10656    start: Option<i64>,
10657    end: Option<i64>,
10658    periods: Option<usize>,
10659    freq: i64,
10660    name: Option<&str>,
10661) -> Result<Index, TimedeltaRangeError> {
10662    if freq <= 0 {
10663        return Err(TimedeltaRangeError::NonPositiveFreq);
10664    }
10665
10666    let (start_ns, count) = match (start, end, periods) {
10667        (Some(s), Some(e), None) => {
10668            if e < s {
10669                return Err(TimedeltaRangeError::InvalidRange);
10670            }
10671            let n = ((e - s) / freq + 1) as usize;
10672            (s, n)
10673        }
10674        (Some(s), None, Some(p)) => (s, p),
10675        (None, Some(e), Some(p)) => {
10676            let s = e - (p.saturating_sub(1) as i64) * freq;
10677            (s, p)
10678        }
10679        (Some(_), Some(_), Some(_)) => return Err(TimedeltaRangeError::TooManyParams),
10680        _ => return Err(TimedeltaRangeError::InsufficientParams),
10681    };
10682
10683    let nanos: Vec<i64> = (0..count).map(|i| start_ns + (i as i64) * freq).collect();
10684    let mut idx = Index::from_timedelta64(nanos);
10685    if let Some(n) = name {
10686        idx = idx.set_name(n);
10687    }
10688    Ok(idx)
10689}
10690
10691// ── DatetimeIndex helpers ───────────────────────────────────────────────
10692
10693/// Error for date_range parameter combinations.
10694#[derive(Debug, Clone, Error)]
10695pub enum DateRangeError {
10696    #[error("must specify at least two of start, end, periods")]
10697    InsufficientParams,
10698    #[error("need at least 3 dates to infer frequency")]
10699    InsufficientDates,
10700    #[error("must specify no more than two of start, end, periods")]
10701    TooManyParams,
10702    #[error("freq must be positive")]
10703    NonPositiveFreq,
10704    #[error("cannot compute range: end < start with positive freq")]
10705    InvalidRange,
10706    #[error("invalid datetime string: {0}")]
10707    ParseError(String),
10708}
10709
10710/// Parse an ISO 8601 datetime string to nanoseconds since epoch.
10711fn parse_datetime_to_nanos(s: &str) -> Result<i64, DateRangeError> {
10712    use chrono::NaiveDateTime;
10713
10714    let trimmed = s.trim();
10715
10716    // Try full datetime format
10717    if let Ok(dt) = NaiveDateTime::parse_from_str(trimmed, "%Y-%m-%d %H:%M:%S") {
10718        return datetime_to_nanos(dt);
10719    }
10720    if let Ok(dt) = NaiveDateTime::parse_from_str(trimmed, "%Y-%m-%dT%H:%M:%S") {
10721        return datetime_to_nanos(dt);
10722    }
10723
10724    // Try date-only format (midnight)
10725    if let Ok(date) = chrono::NaiveDate::parse_from_str(trimmed, "%Y-%m-%d") {
10726        let dt = date
10727            .and_hms_opt(0, 0, 0)
10728            .ok_or(DateRangeError::InvalidRange)?;
10729        return datetime_to_nanos(dt);
10730    }
10731
10732    Err(DateRangeError::ParseError(trimmed.to_owned()))
10733}
10734
10735fn datetime_to_nanos(dt: chrono::NaiveDateTime) -> Result<i64, DateRangeError> {
10736    dt.and_utc()
10737        .timestamp_nanos_opt()
10738        .ok_or(DateRangeError::InvalidRange)
10739}
10740
10741fn datetime_nanos_to_date(nanos: i64) -> Result<chrono::NaiveDate, DateRangeError> {
10742    let (date, _) = split_datetime_nanos(nanos)?;
10743    Ok(date)
10744}
10745
10746fn split_datetime_nanos(nanos: i64) -> Result<(chrono::NaiveDate, i64), DateRangeError> {
10747    let days = nanos.div_euclid(Timedelta::NANOS_PER_DAY);
10748    let time_nanos = nanos.rem_euclid(Timedelta::NANOS_PER_DAY);
10749    let epoch = chrono::NaiveDate::from_ymd_opt(1970, 1, 1).ok_or(DateRangeError::InvalidRange)?;
10750    let date = epoch
10751        .checked_add_signed(chrono::Duration::days(days))
10752        .ok_or(DateRangeError::InvalidRange)?;
10753    Ok((date, time_nanos))
10754}
10755
10756fn date_to_midnight_nanos(date: chrono::NaiveDate) -> Result<i64, DateRangeError> {
10757    let dt = date
10758        .and_hms_opt(0, 0, 0)
10759        .ok_or(DateRangeError::InvalidRange)?;
10760    dt.and_utc()
10761        .timestamp_nanos_opt()
10762        .ok_or(DateRangeError::InvalidRange)
10763}
10764
10765fn date_and_time_to_nanos(date: chrono::NaiveDate, time_nanos: i64) -> Result<i64, DateRangeError> {
10766    date_to_midnight_nanos(date)?
10767        .checked_add(time_nanos)
10768        .ok_or(DateRangeError::InvalidRange)
10769}
10770
10771fn checked_day_step(
10772    date: chrono::NaiveDate,
10773    days: i64,
10774) -> Result<chrono::NaiveDate, DateRangeError> {
10775    date.checked_add_signed(chrono::Duration::days(days))
10776        .ok_or(DateRangeError::InvalidRange)
10777}
10778
10779fn is_business_day(date: chrono::NaiveDate) -> bool {
10780    use chrono::{Datelike, Weekday};
10781
10782    !matches!(date.weekday(), Weekday::Sat | Weekday::Sun)
10783}
10784
10785fn next_business_day(mut date: chrono::NaiveDate) -> Result<chrono::NaiveDate, DateRangeError> {
10786    while !is_business_day(date) {
10787        date = checked_day_step(date, 1)?;
10788    }
10789    Ok(date)
10790}
10791
10792fn previous_business_day(mut date: chrono::NaiveDate) -> Result<chrono::NaiveDate, DateRangeError> {
10793    while !is_business_day(date) {
10794        date = checked_day_step(date, -1)?;
10795    }
10796    Ok(date)
10797}
10798
10799fn collect_business_days_from_start(
10800    start: chrono::NaiveDate,
10801    periods: usize,
10802) -> Result<Vec<i64>, DateRangeError> {
10803    let mut values = Vec::with_capacity(periods);
10804    let mut date = next_business_day(start)?;
10805    while values.len() < periods {
10806        values.push(date_to_midnight_nanos(date)?);
10807        date = next_business_day(checked_day_step(date, 1)?)?;
10808    }
10809    Ok(values)
10810}
10811
10812fn collect_business_days_through_end(
10813    end: chrono::NaiveDate,
10814    periods: usize,
10815) -> Result<Vec<i64>, DateRangeError> {
10816    let mut values = Vec::with_capacity(periods);
10817    let mut date = previous_business_day(end)?;
10818    while values.len() < periods {
10819        values.push(date_to_midnight_nanos(date)?);
10820        date = previous_business_day(checked_day_step(date, -1)?)?;
10821    }
10822    values.reverse();
10823    Ok(values)
10824}
10825
10826fn collect_business_days_between(
10827    start: chrono::NaiveDate,
10828    end: chrono::NaiveDate,
10829) -> Result<Vec<i64>, DateRangeError> {
10830    if end < start {
10831        return Err(DateRangeError::InvalidRange);
10832    }
10833
10834    let mut values = Vec::new();
10835    let mut date = next_business_day(start)?;
10836    while date <= end {
10837        values.push(date_to_midnight_nanos(date)?);
10838        date = next_business_day(checked_day_step(date, 1)?)?;
10839    }
10840    Ok(values)
10841}
10842
10843/// A small subset of pandas `pandas.tseries.offsets` date offsets.
10844#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10845pub enum DateOffset {
10846    Day(i32),
10847    BusinessDay(i32),
10848    MonthEnd(i32),
10849}
10850
10851/// Apply a date offset to a timestamp string and return nanoseconds since epoch.
10852///
10853/// This mirrors `pd.Timestamp(timestamp) + pd.offsets.<Offset>(n)` for the
10854/// supported offsets.
10855pub fn apply_date_offset(timestamp: &str, offset: DateOffset) -> Result<i64, DateRangeError> {
10856    let nanos = parse_datetime_to_nanos(timestamp)?;
10857    apply_date_offset_to_nanos(nanos, offset)
10858}
10859
10860/// Apply a date offset to a nanosecond timestamp.
10861pub fn apply_date_offset_to_nanos(nanos: i64, offset: DateOffset) -> Result<i64, DateRangeError> {
10862    match offset {
10863        DateOffset::Day(days) => nanos
10864            .checked_add(
10865                i64::from(days)
10866                    .checked_mul(Timedelta::NANOS_PER_DAY)
10867                    .ok_or(DateRangeError::InvalidRange)?,
10868            )
10869            .ok_or(DateRangeError::InvalidRange),
10870        DateOffset::BusinessDay(days) => {
10871            let (date, time_nanos) = split_datetime_nanos(nanos)?;
10872            let shifted = apply_business_day_offset(date, days)?;
10873            date_and_time_to_nanos(shifted, time_nanos)
10874        }
10875        DateOffset::MonthEnd(months) => {
10876            let (date, time_nanos) = split_datetime_nanos(nanos)?;
10877            let shifted = apply_month_end_offset(date, months)?;
10878            date_and_time_to_nanos(shifted, time_nanos)
10879        }
10880    }
10881}
10882
10883fn apply_business_day_offset(
10884    date: chrono::NaiveDate,
10885    days: i32,
10886) -> Result<chrono::NaiveDate, DateRangeError> {
10887    if days == 0 {
10888        return next_business_day(date);
10889    }
10890
10891    let mut shifted = date;
10892    if days > 0 {
10893        for _ in 0..days.unsigned_abs() {
10894            shifted = next_business_day(checked_day_step(shifted, 1)?)?;
10895        }
10896    } else {
10897        for _ in 0..days.unsigned_abs() {
10898            shifted = previous_business_day(checked_day_step(shifted, -1)?)?;
10899        }
10900    }
10901    Ok(shifted)
10902}
10903
10904fn last_day_of_month(year: i32, month: u32) -> Result<chrono::NaiveDate, DateRangeError> {
10905    let (next_year, next_month) = if month == 12 {
10906        (year.checked_add(1).ok_or(DateRangeError::InvalidRange)?, 1)
10907    } else {
10908        (year, month + 1)
10909    };
10910    let first_next_month = chrono::NaiveDate::from_ymd_opt(next_year, next_month, 1)
10911        .ok_or(DateRangeError::InvalidRange)?;
10912    checked_day_step(first_next_month, -1)
10913}
10914
10915fn add_months_to_month_end(
10916    date: chrono::NaiveDate,
10917    months: i32,
10918) -> Result<chrono::NaiveDate, DateRangeError> {
10919    use chrono::Datelike;
10920
10921    let month_index = i64::from(date.year())
10922        .checked_mul(12)
10923        .and_then(|value| value.checked_add(i64::from(date.month()) - 1))
10924        .and_then(|value| value.checked_add(i64::from(months)))
10925        .ok_or(DateRangeError::InvalidRange)?;
10926    let year =
10927        i32::try_from(month_index.div_euclid(12)).map_err(|_| DateRangeError::InvalidRange)?;
10928    let month =
10929        u32::try_from(month_index.rem_euclid(12) + 1).map_err(|_| DateRangeError::InvalidRange)?;
10930    last_day_of_month(year, month)
10931}
10932
10933fn month_ordinal(date: chrono::NaiveDate) -> i64 {
10934    use chrono::Datelike;
10935
10936    i64::from(date.year()) * 12 + i64::from(date.month()) - 1
10937}
10938
10939fn apply_month_end_offset(
10940    date: chrono::NaiveDate,
10941    months: i32,
10942) -> Result<chrono::NaiveDate, DateRangeError> {
10943    use chrono::Datelike;
10944
10945    let current_month_end = last_day_of_month(date.year(), date.month())?;
10946    if months == 0 {
10947        return if date == current_month_end {
10948            Ok(date)
10949        } else {
10950            Ok(current_month_end)
10951        };
10952    }
10953
10954    let month_steps = if months > 0 && date != current_month_end {
10955        months - 1
10956    } else {
10957        months
10958    };
10959    add_months_to_month_end(current_month_end, month_steps)
10960}
10961
10962fn fixed_frequency_name(diff: i64) -> Option<String> {
10963    if diff <= 0 {
10964        return None;
10965    }
10966
10967    let units = [
10968        (Timedelta::NANOS_PER_DAY, "D"),
10969        (Timedelta::NANOS_PER_HOUR, "h"),
10970        (Timedelta::NANOS_PER_MIN, "min"),
10971        (Timedelta::NANOS_PER_SEC, "s"),
10972        (Timedelta::NANOS_PER_MILLI, "ms"),
10973        (Timedelta::NANOS_PER_MICRO, "us"),
10974        (1, "ns"),
10975    ];
10976    for (unit_nanos, suffix) in units {
10977        if diff % unit_nanos == 0 {
10978            let count = diff / unit_nanos;
10979            return if count == 1 {
10980                Some(suffix.to_owned())
10981            } else {
10982                Some(format!("{count}{suffix}"))
10983            };
10984        }
10985    }
10986    None
10987}
10988
10989fn infer_business_day_freq(dates: &[(chrono::NaiveDate, i64)]) -> Option<String> {
10990    if dates.iter().any(|(date, _)| !is_business_day(*date)) {
10991        return None;
10992    }
10993    let first_time = dates[0].1;
10994    if dates.iter().any(|(_, time)| *time != first_time) {
10995        return None;
10996    }
10997    for window in dates.windows(2) {
10998        let expected = next_business_day(checked_day_step(window[0].0, 1).ok()?).ok()?;
10999        if window[1].0 != expected {
11000            return None;
11001        }
11002    }
11003    Some("B".to_owned())
11004}
11005
11006fn infer_month_end_freq(dates: &[(chrono::NaiveDate, i64)]) -> Option<String> {
11007    use chrono::Datelike;
11008
11009    let first_time = dates[0].1;
11010    if dates.iter().any(|(_, time)| *time != first_time) {
11011        return None;
11012    }
11013    for (date, _) in dates {
11014        if *date != last_day_of_month(date.year(), date.month()).ok()? {
11015            return None;
11016        }
11017    }
11018
11019    let step = month_ordinal(dates[1].0) - month_ordinal(dates[0].0);
11020    if step <= 0 {
11021        return None;
11022    }
11023    if dates
11024        .windows(2)
11025        .all(|window| month_ordinal(window[1].0) - month_ordinal(window[0].0) == step)
11026    {
11027        if step == 1 {
11028            Some("ME".to_owned())
11029        } else {
11030            Some(format!("{step}ME"))
11031        }
11032    } else {
11033        None
11034    }
11035}
11036
11037/// Infer a pandas-style frequency string from a DatetimeIndex.
11038///
11039/// Returns `Ok(None)` for irregular or duplicate timestamp sequences. Returns
11040/// an error for the pandas-compatible "fewer than 3 dates" case.
11041pub fn infer_freq(index: &Index) -> Result<Option<String>, DateRangeError> {
11042    let mut values = Vec::with_capacity(index.len());
11043    for label in index.labels() {
11044        match label {
11045            IndexLabel::Datetime64(value) if *value != i64::MIN => values.push(*value),
11046            IndexLabel::Datetime64(_) => return Ok(None),
11047            _ => {
11048                return Err(DateRangeError::ParseError(
11049                    "expected datetime64 index".to_owned(),
11050                ));
11051            }
11052        }
11053    }
11054    infer_freq_from_nanos(&values)
11055}
11056
11057/// Infer a pandas-style frequency string from timestamp strings.
11058pub fn infer_freq_from_timestamps(timestamps: &[&str]) -> Result<Option<String>, DateRangeError> {
11059    let values: Vec<i64> = timestamps
11060        .iter()
11061        .map(|timestamp| parse_datetime_to_nanos(timestamp))
11062        .collect::<Result<_, _>>()?;
11063    infer_freq_from_nanos(&values)
11064}
11065
11066/// Infer a pandas-style frequency string from nanosecond timestamps.
11067pub fn infer_freq_from_nanos(values: &[i64]) -> Result<Option<String>, DateRangeError> {
11068    if values.len() < 3 {
11069        return Err(DateRangeError::InsufficientDates);
11070    }
11071    if values.windows(2).any(|window| window[1] <= window[0]) {
11072        return Ok(None);
11073    }
11074
11075    let first_diff = values[1] - values[0];
11076    if values
11077        .windows(2)
11078        .all(|window| window[1] - window[0] == first_diff)
11079    {
11080        return Ok(fixed_frequency_name(first_diff));
11081    }
11082
11083    let dates: Vec<(chrono::NaiveDate, i64)> = values
11084        .iter()
11085        .map(|value| split_datetime_nanos(*value))
11086        .collect::<Result<_, _>>()?;
11087    if let Some(freq) = infer_business_day_freq(&dates) {
11088        return Ok(Some(freq));
11089    }
11090    if let Some(freq) = infer_month_end_freq(&dates) {
11091        return Ok(Some(freq));
11092    }
11093
11094    Ok(None)
11095}
11096
11097/// Create a DatetimeIndex with evenly spaced values.
11098///
11099/// Analogous to `pd.date_range()`. Must specify exactly two of:
11100/// start, end, periods. Frequency defaults to 1 day.
11101///
11102/// # Examples
11103/// ```
11104/// use fp_index::date_range;
11105/// use fp_types::Timedelta;
11106///
11107/// let idx = date_range(
11108///     Some("2024-01-01"),
11109///     None,
11110///     Some(3),
11111///     Timedelta::NANOS_PER_DAY,
11112///     None,
11113/// ).unwrap();
11114/// assert_eq!(idx.len(), 3);
11115/// ```
11116pub fn date_range(
11117    start: Option<&str>,
11118    end: Option<&str>,
11119    periods: Option<usize>,
11120    freq: i64,
11121    name: Option<&str>,
11122) -> Result<Index, DateRangeError> {
11123    if freq <= 0 {
11124        return Err(DateRangeError::NonPositiveFreq);
11125    }
11126
11127    let start_ns = start.map(parse_datetime_to_nanos).transpose()?;
11128    let end_ns = end.map(parse_datetime_to_nanos).transpose()?;
11129
11130    let (start_val, count) = match (start_ns, end_ns, periods) {
11131        (Some(s), Some(e), None) => {
11132            if e < s {
11133                return Err(DateRangeError::InvalidRange);
11134            }
11135            let span = e.checked_sub(s).ok_or(DateRangeError::InvalidRange)?;
11136            let n = (span / freq + 1) as usize;
11137            (s, n)
11138        }
11139        (Some(s), None, Some(p)) => (s, p),
11140        (None, Some(e), Some(p)) => {
11141            let offset = checked_date_range_offset(p.saturating_sub(1), freq)?;
11142            let s = e.checked_sub(offset).ok_or(DateRangeError::InvalidRange)?;
11143            (s, p)
11144        }
11145        (Some(_), Some(_), Some(_)) => return Err(DateRangeError::TooManyParams),
11146        _ => return Err(DateRangeError::InsufficientParams),
11147    };
11148
11149    let last_offset = checked_date_range_offset(count.saturating_sub(1), freq)?;
11150    start_val
11151        .checked_add(last_offset)
11152        .ok_or(DateRangeError::InvalidRange)?;
11153
11154    let nanos: Vec<i64> = (0..count)
11155        .map(|i| {
11156            let offset = checked_date_range_offset(i, freq)?;
11157            start_val
11158                .checked_add(offset)
11159                .ok_or(DateRangeError::InvalidRange)
11160        })
11161        .collect::<Result<_, _>>()?;
11162    let mut idx = Index::from_datetime64(nanos);
11163    if let Some(n) = name {
11164        idx = idx.set_name(n);
11165    }
11166    Ok(idx)
11167}
11168
11169fn checked_date_range_offset(steps: usize, freq: i64) -> Result<i64, DateRangeError> {
11170    let steps = i64::try_from(steps).map_err(|_| DateRangeError::InvalidRange)?;
11171    steps.checked_mul(freq).ok_or(DateRangeError::InvalidRange)
11172}
11173
11174/// Create a DatetimeIndex with default weekday-only business-day values.
11175///
11176/// Analogous to `pd.bdate_range(..., freq="B")` for the default Monday-Friday
11177/// calendar. Exactly two of start, end, and periods must be specified.
11178pub fn bdate_range(
11179    start: Option<&str>,
11180    end: Option<&str>,
11181    periods: Option<usize>,
11182    name: Option<&str>,
11183) -> Result<Index, DateRangeError> {
11184    let start_date = start
11185        .map(parse_datetime_to_nanos)
11186        .transpose()?
11187        .map(datetime_nanos_to_date)
11188        .transpose()?;
11189    let end_date = end
11190        .map(parse_datetime_to_nanos)
11191        .transpose()?
11192        .map(datetime_nanos_to_date)
11193        .transpose()?;
11194
11195    let nanos = match (start_date, end_date, periods) {
11196        (Some(start), Some(end), None) => collect_business_days_between(start, end)?,
11197        (Some(start), None, Some(periods)) => collect_business_days_from_start(start, periods)?,
11198        (None, Some(end), Some(periods)) => collect_business_days_through_end(end, periods)?,
11199        (Some(_), Some(_), Some(_)) => return Err(DateRangeError::TooManyParams),
11200        _ => return Err(DateRangeError::InsufficientParams),
11201    };
11202
11203    let mut idx = Index::from_datetime64(nanos);
11204    if let Some(n) = name {
11205        idx = idx.set_name(n);
11206    }
11207    Ok(idx)
11208}
11209
11210// ── MultiIndex ──────────────────────────────────────────────────────────
11211
11212/// A hierarchical (multi-level) index for DataFrames and Series.
11213///
11214/// Stores multiple levels of labels as separate vectors (columnar layout),
11215/// analogous to pandas `pd.MultiIndex`. Each row position has one label
11216/// per level. The combination of labels across all levels forms the
11217/// composite key for that row.
11218///
11219/// This type exists alongside `Index` and can be converted to/from it.
11220/// Full DataFrame integration is a future step.
11221#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
11222pub struct MultiIndex {
11223    /// One `Vec<IndexLabel>` per level, all the same length (= nrows).
11224    levels: Vec<Vec<IndexLabel>>,
11225    /// Optional name for each level.
11226    names: Vec<Option<String>>,
11227}
11228
11229impl MultiIndex {
11230    /// Number of levels in this MultiIndex.
11231    #[must_use]
11232    pub fn nlevels(&self) -> usize {
11233        self.levels.len()
11234    }
11235
11236    /// Number of rows (entries) in this MultiIndex.
11237    #[must_use]
11238    pub fn len(&self) -> usize {
11239        self.levels.first().map_or(0, Vec::len)
11240    }
11241
11242    /// Whether this MultiIndex has zero entries.
11243    #[must_use]
11244    pub fn is_empty(&self) -> bool {
11245        self.len() == 0
11246    }
11247
11248    /// Compare two rows lexicographically across all levels.
11249    ///
11250    /// Private helper for sortedness predicates. Returns `Ordering::Equal`
11251    /// only when every level value matches exactly.
11252    fn row_cmp(&self, a: usize, b: usize) -> std::cmp::Ordering {
11253        for level in 0..self.nlevels() {
11254            let ord = self.levels[level][a].cmp(&self.levels[level][b]);
11255            if ord != std::cmp::Ordering::Equal {
11256                return ord;
11257            }
11258        }
11259        std::cmp::Ordering::Equal
11260    }
11261
11262    /// Whether this MultiIndex is sorted in lexicographic (row-major) order.
11263    ///
11264    /// Matches `pd.MultiIndex.is_monotonic_increasing`. Row `i` must be less
11265    /// than or equal to row `i+1` under level-by-level comparison. Empty or
11266    /// single-row indexes return `true` (trivially sorted).
11267    ///
11268    /// Per br-frankenpandas-w4uu: pandas `df.loc['A':'B']` on a MultiIndex
11269    /// raises `KeyError: MultiIndex slicing requires the index to be
11270    /// lexsorted` when this predicate is false. fp-frame's range-slice
11271    /// callers should gate on this before delegating to `slice_locs`.
11272    #[must_use]
11273    pub fn is_monotonic_increasing(&self) -> bool {
11274        if self.len() <= 1 {
11275            return true;
11276        }
11277        (0..self.len() - 1).all(|i| self.row_cmp(i, i + 1) != std::cmp::Ordering::Greater)
11278    }
11279
11280    /// Whether this MultiIndex is sorted in strictly descending order.
11281    ///
11282    /// Matches `pd.MultiIndex.is_monotonic_decreasing`. Row `i` must be
11283    /// greater than or equal to row `i+1`. Empty / single-row: `true`.
11284    #[must_use]
11285    pub fn is_monotonic_decreasing(&self) -> bool {
11286        if self.len() <= 1 {
11287            return true;
11288        }
11289        (0..self.len() - 1).all(|i| self.row_cmp(i, i + 1) != std::cmp::Ordering::Less)
11290    }
11291
11292    /// Alias for `is_monotonic_increasing` matching `pd.MultiIndex.is_lexsorted`
11293    /// semantics (pandas deprecated the `is_lexsorted` name in 1.x; we keep
11294    /// it as a convenience alias for migrated code).
11295    #[must_use]
11296    pub fn is_lexsorted(&self) -> bool {
11297        self.is_monotonic_increasing()
11298    }
11299
11300    /// Level names.
11301    #[must_use]
11302    pub fn names(&self) -> &[Option<String>] {
11303        &self.names
11304    }
11305
11306    /// Scalar index name, matching `pd.MultiIndex.name`.
11307    #[must_use]
11308    pub fn name(&self) -> Option<&str> {
11309        None
11310    }
11311
11312    /// Number of entries, matching `pd.MultiIndex.size`.
11313    #[must_use]
11314    pub fn size(&self) -> usize {
11315        self.len()
11316    }
11317
11318    /// Shape of this one-dimensional index, matching `pd.MultiIndex.shape`.
11319    #[must_use]
11320    pub fn shape(&self) -> (usize,) {
11321        (self.len(),)
11322    }
11323
11324    /// Number of dimensions, matching `pd.MultiIndex.ndim`.
11325    #[must_use]
11326    pub fn ndim(&self) -> usize {
11327        1
11328    }
11329
11330    /// Alias for `is_empty`, matching the pandas `.empty` property.
11331    #[must_use]
11332    pub fn empty(&self) -> bool {
11333        self.is_empty()
11334    }
11335
11336    fn shift_unsupported_error() -> IndexError {
11337        IndexError::InvalidArgument(
11338            "This method is only implemented for DatetimeIndex, PeriodIndex and TimedeltaIndex; Got type MultiIndex"
11339                .to_owned(),
11340        )
11341    }
11342
11343    /// Unsupported temporal shift, matching `pd.MultiIndex.shift(...)`.
11344    pub fn shift(&self, _periods: i64, _freq: Option<&str>) -> Result<Self, IndexError> {
11345        Err(Self::shift_unsupported_error())
11346    }
11347
11348    fn astype_categorical_error() -> IndexError {
11349        IndexError::InvalidArgument(
11350            "> 1 ndim Categorical are not supported at this time".to_owned(),
11351        )
11352    }
11353
11354    fn astype_unsupported_dtype_error(dtype: &str) -> IndexError {
11355        IndexError::InvalidArgument(format!(
11356            "Setting a MultiIndex dtype to anything other than object is not supported; got {dtype}"
11357        ))
11358    }
11359
11360    /// Cast labels to a different dtype, matching `pd.MultiIndex.astype(...)`.
11361    ///
11362    /// Pandas only supports the object dtype on MultiIndex; categorical raises
11363    /// `NotImplementedError` and any other dtype raises `TypeError`. Object
11364    /// returns a clone of the index.
11365    pub fn astype(&self, dtype: &str) -> Result<Self, IndexError> {
11366        match dtype {
11367            "object" | "O" => Ok(self.clone()),
11368            "category" => Err(Self::astype_categorical_error()),
11369            other => Err(Self::astype_unsupported_dtype_error(other)),
11370        }
11371    }
11372
11373    fn diff_unsupported_error() -> IndexError {
11374        IndexError::InvalidArgument(
11375            "cannot perform __sub__ with this index type: MultiIndex".to_owned(),
11376        )
11377    }
11378
11379    /// Unsupported positional differencing, matching `pd.MultiIndex.diff(...)`.
11380    ///
11381    /// Pandas defines `Index.diff` as `self - self.shift(periods)` and raises
11382    /// `TypeError` because tuple-valued levels do not support subtraction.
11383    pub fn diff(&self, _periods: i64) -> Result<Self, IndexError> {
11384        Err(Self::diff_unsupported_error())
11385    }
11386
11387    fn round_unsupported_error() -> IndexError {
11388        IndexError::InvalidArgument(
11389            "loop of ufunc does not support argument 0 of type tuple which has no callable rint method"
11390                .to_owned(),
11391        )
11392    }
11393
11394    /// Unsupported numeric rounding, matching `pd.MultiIndex.round(...)`.
11395    ///
11396    /// Pandas applies `np.around` to the underlying values; tuple-valued
11397    /// MultiIndex labels do not support `rint`, so this surface always rejects.
11398    pub fn round(&self, _decimals: i32) -> Result<Self, IndexError> {
11399        Err(Self::round_unsupported_error())
11400    }
11401
11402    fn string_accessor_error() -> IndexError {
11403        IndexError::InvalidArgument(
11404            "Can only use .str accessor with Index, not MultiIndex".to_owned(),
11405        )
11406    }
11407
11408    /// Unsupported string accessor, matching `pd.MultiIndex.str`.
11409    pub fn r#str(&self) -> Result<(), IndexError> {
11410        Err(Self::string_accessor_error())
11411    }
11412
11413    fn asof_comparison_type_name(&self) -> &'static str {
11414        match self.levels.first().and_then(|level| level.first()) {
11415            Some(IndexLabel::Int64(_)) => "int",
11416            Some(IndexLabel::Utf8(_)) => "str",
11417            Some(IndexLabel::Timedelta64(_)) => "Timedelta",
11418            Some(IndexLabel::Datetime64(_)) => "Timestamp",
11419            Some(IndexLabel::Null(fp_types::NullKind::Null)) => "NoneType",
11420            Some(IndexLabel::Null(fp_types::NullKind::NaN)) => "float",
11421            Some(IndexLabel::Null(fp_types::NullKind::NaT)) => "NaTType",
11422            None => "object",
11423        }
11424    }
11425
11426    fn asof_unsupported_error(&self) -> IndexError {
11427        IndexError::InvalidArgument(format!(
11428            "'<' not supported between instances of 'tuple' and '{}'",
11429            self.asof_comparison_type_name()
11430        ))
11431    }
11432
11433    /// Unsupported nearest-key lookup, matching `pd.MultiIndex.asof(...)`.
11434    pub fn asof(&self, _key: &[IndexLabel]) -> Result<Option<Vec<IndexLabel>>, IndexError> {
11435        if self.is_empty() {
11436            return Ok(None);
11437        }
11438        Err(self.asof_unsupported_error())
11439    }
11440
11441    fn asof_locs_no_mask_error() -> IndexError {
11442        IndexError::InvalidArgument("object too deep for desired array".to_owned())
11443    }
11444
11445    fn asof_locs_empty_mask_error() -> IndexError {
11446        IndexError::InvalidArgument("attempt to get argmax of an empty sequence".to_owned())
11447    }
11448
11449    fn asof_locs_empty_take_error() -> IndexError {
11450        IndexError::InvalidArgument("cannot do a non-empty take from an empty axes.".to_owned())
11451    }
11452
11453    fn asof_locs_mask_length_error(expected: usize, actual: usize) -> IndexError {
11454        IndexError::InvalidArgument(format!(
11455            "boolean index did not match indexed array along axis 0; size of axis is {expected} but size of corresponding boolean axis is {actual}"
11456        ))
11457    }
11458
11459    fn asof_locs_broadcast_error(where_len: usize) -> IndexError {
11460        IndexError::InvalidArgument(format!(
11461            "operands could not be broadcast together with shapes ({where_len},) (2,)"
11462        ))
11463    }
11464
11465    /// Unsupported nearest-position lookup, matching `pd.MultiIndex.asof_locs(...)`.
11466    pub fn asof_locs(
11467        &self,
11468        where_index: &Self,
11469        mask: Option<&[bool]>,
11470    ) -> Result<Vec<Option<usize>>, IndexError> {
11471        let Some(mask) = mask else {
11472            return Err(Self::asof_locs_no_mask_error());
11473        };
11474        if mask.len() != self.len() {
11475            return Err(Self::asof_locs_mask_length_error(self.len(), mask.len()));
11476        }
11477        if mask.is_empty() && self.is_empty() && where_index.is_empty() {
11478            return Err(Self::asof_locs_empty_mask_error());
11479        }
11480        if mask.iter().all(|include| !*include) && !where_index.is_empty() {
11481            return Err(Self::asof_locs_empty_take_error());
11482        }
11483        Err(Self::asof_locs_broadcast_error(where_index.len()))
11484    }
11485
11486    /// Set the names for all levels.
11487    #[must_use]
11488    pub fn set_names(mut self, names: Vec<Option<String>>) -> Self {
11489        // Pad or truncate to match nlevels.
11490        self.names = names;
11491        self.names.resize(self.nlevels(), None);
11492        self
11493    }
11494
11495    /// Rename all MultiIndex levels, matching `pd.MultiIndex.rename(names)`.
11496    ///
11497    /// Unlike [`Self::set_names`], pandas rename requires one name per level
11498    /// and returns a renamed clone without mutating the source index.
11499    pub fn rename(&self, names: Vec<Option<String>>) -> Result<Self, IndexError> {
11500        if names.len() != self.nlevels() {
11501            return Err(IndexError::LengthMismatch {
11502                expected: self.nlevels(),
11503                actual: names.len(),
11504                context: "MultiIndex.rename names length".to_owned(),
11505            });
11506        }
11507        Ok(Self {
11508            levels: self.levels.clone(),
11509            names,
11510        })
11511    }
11512
11513    /// Rename one MultiIndex level, matching `pd.MultiIndex.rename(name, level=...)`.
11514    pub fn rename_level(&self, name: Option<String>, level: usize) -> Result<Self, IndexError> {
11515        if level >= self.nlevels() {
11516            return Err(IndexError::OutOfBounds {
11517                position: level,
11518                length: self.nlevels(),
11519            });
11520        }
11521        let mut names = self.names.clone();
11522        names[level] = name;
11523        Ok(Self {
11524            levels: self.levels.clone(),
11525            names,
11526        })
11527    }
11528
11529    fn shared_names(&self, other: &Self) -> Vec<Option<String>> {
11530        self.names
11531            .iter()
11532            .zip(&other.names)
11533            .map(
11534                |(left, right)| {
11535                    if left == right { left.clone() } else { None }
11536                },
11537            )
11538            .collect()
11539    }
11540
11541    fn ensure_same_nlevels(&self, other: &Self) -> Result<(), IndexError> {
11542        if self.nlevels() != other.nlevels() {
11543            return Err(IndexError::LengthMismatch {
11544                expected: self.nlevels(),
11545                actual: other.nlevels(),
11546                context: "MultiIndex level count mismatch".to_owned(),
11547            });
11548        }
11549        Ok(())
11550    }
11551
11552    fn tuple_at(&self, row: usize) -> Vec<IndexLabel> {
11553        self.levels.iter().map(|level| level[row].clone()).collect()
11554    }
11555
11556    fn take_existing_positions(&self, positions: &[usize]) -> Self {
11557        let levels = self
11558            .levels
11559            .iter()
11560            .map(|level| {
11561                positions
11562                    .iter()
11563                    .map(|&position| level[position].clone())
11564                    .collect()
11565            })
11566            .collect();
11567        Self {
11568            levels,
11569            names: self.names.clone(),
11570        }
11571    }
11572
11573    fn missing_label_for_level(&self, level_idx: usize) -> IndexLabel {
11574        self.levels[level_idx]
11575            .iter()
11576            .find(|label| label.is_missing())
11577            .cloned()
11578            .unwrap_or(IndexLabel::Datetime64(i64::MIN))
11579    }
11580
11581    fn from_tuples_with_names(
11582        tuples: Vec<Vec<IndexLabel>>,
11583        names: Vec<Option<String>>,
11584    ) -> Result<Self, IndexError> {
11585        Ok(Self::from_tuples(tuples)?.set_names(names))
11586    }
11587
11588    /// Unique labels for each level, preserving first-seen order.
11589    ///
11590    /// Matches `pd.MultiIndex.levels`. Missing labels are excluded from the
11591    /// level catalog and receive `-1` in `codes()`.
11592    #[must_use]
11593    pub fn levels(&self) -> Vec<Index> {
11594        self.levels
11595            .iter()
11596            .enumerate()
11597            .map(|(level_idx, level)| {
11598                let mut seen = FxHashMap::<&IndexLabel, ()>::default();
11599                let labels = level
11600                    .iter()
11601                    .filter(|label| !label.is_missing() && seen.insert(label, ()).is_none())
11602                    .cloned()
11603                    .collect();
11604                let mut index = Index::new(labels);
11605                if let Some(name) = self.names.get(level_idx).and_then(|name| name.as_ref()) {
11606                    index = index.set_name(name);
11607                }
11608                index
11609            })
11610            .collect()
11611    }
11612
11613    /// Integer level codes for each row, matching `pd.MultiIndex.codes`.
11614    ///
11615    /// Missing labels receive code `-1`; all other labels are encoded by their
11616    /// first-seen position in the corresponding `levels()` entry.
11617    #[must_use]
11618    pub fn codes(&self) -> Vec<Vec<isize>> {
11619        self.levels
11620            .iter()
11621            .map(|level| {
11622                let mut positions = FxHashMap::<IndexLabel, isize>::default();
11623                let mut next_code = 0_isize;
11624                level
11625                    .iter()
11626                    .map(|label| {
11627                        if label.is_missing() {
11628                            -1
11629                        } else if let Some(code) = positions.get(label) {
11630                            *code
11631                        } else {
11632                            let code = next_code;
11633                            positions.insert(label.clone(), code);
11634                            next_code += 1;
11635                            code
11636                        }
11637                    })
11638                    .collect()
11639            })
11640            .collect()
11641    }
11642
11643    /// Cardinality of each level, matching `pd.MultiIndex.levshape`.
11644    #[must_use]
11645    pub fn levshape(&self) -> Vec<usize> {
11646        self.levels().iter().map(Index::len).collect()
11647    }
11648
11649    /// Materialize every composite key as an owned tuple.
11650    ///
11651    /// Matches `pd.MultiIndex.to_list()` / `tolist()`.
11652    #[must_use]
11653    pub fn to_list(&self) -> Vec<Vec<IndexLabel>> {
11654        (0..self.len()).map(|row| self.tuple_at(row)).collect()
11655    }
11656
11657    /// Alias for `to_list`, matching `pd.MultiIndex.tolist()`.
11658    #[must_use]
11659    pub fn tolist(&self) -> Vec<Vec<IndexLabel>> {
11660        self.to_list()
11661    }
11662
11663    /// Object-array-shaped materialization, matching `pd.MultiIndex.to_numpy`.
11664    #[must_use]
11665    pub fn to_numpy(&self) -> Vec<Vec<IndexLabel>> {
11666        self.to_list()
11667    }
11668
11669    /// Alias for `to_numpy`, matching `pd.MultiIndex.values`.
11670    #[must_use]
11671    pub fn values(&self) -> Vec<Vec<IndexLabel>> {
11672        self.to_numpy()
11673    }
11674
11675    /// Alias for `to_numpy`, matching `pd.MultiIndex.array`.
11676    #[must_use]
11677    pub fn array(&self) -> Vec<Vec<IndexLabel>> {
11678        self.to_numpy()
11679    }
11680
11681    /// Alias for `to_numpy`, matching `pd.MultiIndex.ravel()`.
11682    #[must_use]
11683    pub fn ravel(&self) -> Vec<Vec<IndexLabel>> {
11684        self.to_numpy()
11685    }
11686
11687    /// Return a shallow clone view, matching `pd.MultiIndex.view`.
11688    #[must_use]
11689    pub fn view(&self) -> Self {
11690        self.clone()
11691    }
11692
11693    /// MultiIndex transpose is identity, matching `pd.MultiIndex.transpose`.
11694    #[must_use]
11695    pub fn transpose(&self) -> Self {
11696        self.clone()
11697    }
11698
11699    /// Alias for `transpose`, matching `pd.MultiIndex.T`.
11700    #[allow(non_snake_case)]
11701    #[must_use]
11702    pub fn T(&self) -> Self {
11703        self.transpose()
11704    }
11705
11706    /// Row materialization for `pd.MultiIndex.to_frame(index=False)`.
11707    ///
11708    /// `fp-frame` owns the richer DataFrame return type; the index crate keeps
11709    /// the level-by-level row payload that callers can lift into a frame.
11710    #[must_use]
11711    pub fn to_frame(&self) -> Vec<Vec<IndexLabel>> {
11712        self.to_list()
11713    }
11714
11715    /// Series-shaped materialization using tuple keys as both index and value.
11716    ///
11717    /// This mirrors `pd.MultiIndex.to_series()` at the payload level while
11718    /// avoiding a dependency from `fp-index` back into `fp-frame`.
11719    #[must_use]
11720    pub fn to_series(&self) -> Vec<(Vec<IndexLabel>, Vec<IndexLabel>)> {
11721        self.to_list()
11722            .into_iter()
11723            .map(|tuple| (tuple.clone(), tuple))
11724            .collect()
11725    }
11726
11727    /// Stringify each tuple in row order, matching `pd.MultiIndex.format()`.
11728    #[must_use]
11729    pub fn format(&self) -> Vec<String> {
11730        self.to_list()
11731            .into_iter()
11732            .map(|tuple| {
11733                let parts: Vec<String> = tuple.into_iter().map(|label| label.to_string()).collect();
11734                format!("({})", parts.join(", "))
11735            })
11736            .collect()
11737    }
11738
11739    /// Approximate memory footprint of all level labels and codes.
11740    ///
11741    /// `deep=false` counts fixed-width labels and `String` headers; `deep=true`
11742    /// additionally counts string bytes, mirroring `Index::memory_usage`.
11743    #[must_use]
11744    pub fn memory_usage(&self, deep: bool) -> usize {
11745        self.levels
11746            .iter()
11747            .flatten()
11748            .map(|label| match label {
11749                IndexLabel::Int64(_)
11750                | IndexLabel::Timedelta64(_)
11751                | IndexLabel::Datetime64(_)
11752                | IndexLabel::Null(_) => 8,
11753                IndexLabel::Utf8(value) => {
11754                    if deep {
11755                        std::mem::size_of::<String>() + value.len()
11756                    } else {
11757                        std::mem::size_of::<String>()
11758                    }
11759                }
11760            })
11761            .sum::<usize>()
11762            + self.nlevels() * self.len() * std::mem::size_of::<isize>()
11763    }
11764
11765    /// Shallow memory footprint, matching `pd.MultiIndex.nbytes`.
11766    #[must_use]
11767    pub fn nbytes(&self) -> usize {
11768        self.memory_usage(false)
11769    }
11770
11771    /// Pandas dtype string for MultiIndex itself.
11772    #[must_use]
11773    pub fn dtype(&self) -> &'static str {
11774        "object"
11775    }
11776
11777    /// Dtype string for each level, matching `pd.MultiIndex.dtypes`.
11778    #[must_use]
11779    pub fn dtypes(&self) -> Vec<&'static str> {
11780        self.levels
11781            .iter()
11782            .map(|level| Index::new(level.clone()).dtype())
11783            .collect()
11784    }
11785
11786    /// Pandas-style inferred type for MultiIndex values.
11787    #[must_use]
11788    pub fn inferred_type(&self) -> &'static str {
11789        "mixed"
11790    }
11791
11792    /// Infer object labels without changing this typed representation.
11793    #[must_use]
11794    pub fn infer_objects(&self) -> Self {
11795        self.clone()
11796    }
11797
11798    /// Whether this MultiIndex can hold integer labels as scalar keys.
11799    #[must_use]
11800    pub fn holds_integer(&self) -> bool {
11801        false
11802    }
11803
11804    /// Return the sole tuple, matching `pd.MultiIndex.item()`.
11805    pub fn item(&self) -> Result<Vec<IndexLabel>, IndexError> {
11806        if self.len() == 1 {
11807            Ok(self.tuple_at(0))
11808        } else {
11809            Err(IndexError::InvalidArgument(format!(
11810                "item requires exactly one tuple, got {}",
11811                self.len()
11812            )))
11813        }
11814    }
11815
11816    /// Return a shallow copy, matching `pd.MultiIndex.copy()`.
11817    #[must_use]
11818    pub fn copy(&self) -> Self {
11819        self.clone()
11820    }
11821
11822    fn multi_index_isna_error() -> IndexError {
11823        IndexError::InvalidArgument("isna is not defined for MultiIndex".to_owned())
11824    }
11825
11826    /// Unsupported missing-label check, matching `pd.MultiIndex.hasnans`.
11827    pub fn hasnans(&self) -> Result<bool, IndexError> {
11828        Err(Self::multi_index_isna_error())
11829    }
11830
11831    /// Unsupported missing-label mask, matching `pd.MultiIndex.isna()`.
11832    pub fn isna(&self) -> Result<Vec<bool>, IndexError> {
11833        Err(Self::multi_index_isna_error())
11834    }
11835
11836    /// Alias for `isna`, matching `pd.MultiIndex.isnull`.
11837    pub fn isnull(&self) -> Result<Vec<bool>, IndexError> {
11838        Err(Self::multi_index_isna_error())
11839    }
11840
11841    /// Unsupported inverse missing-label mask, matching `pd.MultiIndex.notna()`.
11842    pub fn notna(&self) -> Result<Vec<bool>, IndexError> {
11843        Err(Self::multi_index_isna_error())
11844    }
11845
11846    /// Alias for `notna`, matching `pd.MultiIndex.notnull`.
11847    pub fn notnull(&self) -> Result<Vec<bool>, IndexError> {
11848        Err(Self::multi_index_isna_error())
11849    }
11850
11851    /// Replace missing labels in every level with one scalar label.
11852    #[must_use]
11853    pub fn fillna(&self, value: &IndexLabel) -> Self {
11854        let levels = self
11855            .levels
11856            .iter()
11857            .map(|level| {
11858                level
11859                    .iter()
11860                    .map(|label| {
11861                        if label.is_missing() {
11862                            value.clone()
11863                        } else {
11864                            label.clone()
11865                        }
11866                    })
11867                    .collect()
11868            })
11869            .collect();
11870        Self {
11871            levels,
11872            names: self.names.clone(),
11873        }
11874    }
11875
11876    /// Replace missing labels with one replacement per level.
11877    pub fn fillna_tuple(&self, values: &[IndexLabel]) -> Result<Self, IndexError> {
11878        if values.len() != self.nlevels() {
11879            return Err(IndexError::LengthMismatch {
11880                expected: self.nlevels(),
11881                actual: values.len(),
11882                context: "fillna_tuple replacement arity mismatch".to_owned(),
11883            });
11884        }
11885        let levels = self
11886            .levels
11887            .iter()
11888            .enumerate()
11889            .map(|(level_idx, level)| {
11890                level
11891                    .iter()
11892                    .map(|label| {
11893                        if label.is_missing() {
11894                            values[level_idx].clone()
11895                        } else {
11896                            label.clone()
11897                        }
11898                    })
11899                    .collect()
11900            })
11901            .collect();
11902        Ok(Self {
11903            levels,
11904            names: self.names.clone(),
11905        })
11906    }
11907
11908    /// Replace tuples where `cond` is true with `value`.
11909    pub fn putmask(&self, cond: &[bool], value: Vec<IndexLabel>) -> Result<Self, IndexError> {
11910        if cond.len() != self.len() {
11911            return Err(IndexError::LengthMismatch {
11912                expected: self.len(),
11913                actual: cond.len(),
11914                context: "putmask condition length mismatch".to_owned(),
11915            });
11916        }
11917        if value.len() != self.nlevels() {
11918            return Err(IndexError::LengthMismatch {
11919                expected: self.nlevels(),
11920                actual: value.len(),
11921                context: "putmask tuple arity mismatch".to_owned(),
11922            });
11923        }
11924        let tuples = (0..self.len())
11925            .map(|row| {
11926                if cond[row] {
11927                    value.clone()
11928                } else {
11929                    self.tuple_at(row)
11930                }
11931            })
11932            .collect();
11933        Self::from_tuples_with_names(tuples, self.names.clone())
11934    }
11935
11936    /// Keep original tuples where `cond` is true, otherwise use `other`.
11937    pub fn r#where(&self, cond: &[bool], other: Vec<IndexLabel>) -> Result<Self, IndexError> {
11938        if cond.len() != self.len() {
11939            return Err(IndexError::LengthMismatch {
11940                expected: self.len(),
11941                actual: cond.len(),
11942                context: "where condition length mismatch".to_owned(),
11943            });
11944        }
11945        if other.len() != self.nlevels() {
11946            return Err(IndexError::LengthMismatch {
11947                expected: self.nlevels(),
11948                actual: other.len(),
11949                context: "where tuple arity mismatch".to_owned(),
11950            });
11951        }
11952        let tuples = (0..self.len())
11953            .map(|row| {
11954                if cond[row] {
11955                    self.tuple_at(row)
11956                } else {
11957                    other.clone()
11958                }
11959            })
11960            .collect();
11961        Self::from_tuples_with_names(tuples, self.names.clone())
11962    }
11963
11964    /// Map each composite tuple through a caller-supplied function.
11965    pub fn map<T, F>(&self, mut mapper: F) -> Vec<T>
11966    where
11967        F: FnMut(&[IndexLabel]) -> T,
11968    {
11969        (0..self.len())
11970            .map(|row| {
11971                let tuple = self.tuple_at(row);
11972                mapper(&tuple)
11973            })
11974            .collect()
11975    }
11976
11977    /// Rebuild row labels using replacement level catalogs and current codes.
11978    pub fn set_levels(&self, new_levels: Vec<Vec<IndexLabel>>) -> Result<Self, IndexError> {
11979        if new_levels.len() != self.nlevels() {
11980            return Err(IndexError::LengthMismatch {
11981                expected: self.nlevels(),
11982                actual: new_levels.len(),
11983                context: "set_levels level count mismatch".to_owned(),
11984            });
11985        }
11986        let codes = self.codes();
11987        let mut levels = Vec::with_capacity(self.nlevels());
11988        for (level_idx, level_codes) in codes.into_iter().enumerate() {
11989            let mut level = Vec::with_capacity(self.len());
11990            for code in level_codes {
11991                if code == -1 {
11992                    level.push(self.missing_label_for_level(level_idx));
11993                    continue;
11994                }
11995                if code < -1 {
11996                    return Err(IndexError::InvalidArgument(format!(
11997                        "negative code {code} at level {level_idx}"
11998                    )));
11999                }
12000                let position = usize::try_from(code).map_err(|_| {
12001                    IndexError::InvalidArgument(format!("invalid code {code} at level {level_idx}"))
12002                })?;
12003                let label = new_levels[level_idx]
12004                    .get(position)
12005                    .ok_or(IndexError::OutOfBounds {
12006                        position,
12007                        length: new_levels[level_idx].len(),
12008                    })?;
12009                level.push(label.clone());
12010            }
12011            levels.push(level);
12012        }
12013        Ok(Self {
12014            levels,
12015            names: self.names.clone(),
12016        })
12017    }
12018
12019    /// Rebuild row labels using replacement codes and current level catalogs.
12020    pub fn set_codes(&self, codes: Vec<Vec<isize>>) -> Result<Self, IndexError> {
12021        if codes.len() != self.nlevels() {
12022            return Err(IndexError::LengthMismatch {
12023                expected: self.nlevels(),
12024                actual: codes.len(),
12025                context: "set_codes level count mismatch".to_owned(),
12026            });
12027        }
12028        let catalogs = self.levels();
12029        let mut levels = Vec::with_capacity(self.nlevels());
12030        for (level_idx, level_codes) in codes.into_iter().enumerate() {
12031            if level_codes.len() != self.len() {
12032                return Err(IndexError::LengthMismatch {
12033                    expected: self.len(),
12034                    actual: level_codes.len(),
12035                    context: format!("set_codes level {level_idx} length mismatch"),
12036                });
12037            }
12038            let labels = catalogs[level_idx].labels();
12039            let mut level = Vec::with_capacity(self.len());
12040            for code in level_codes {
12041                if code == -1 {
12042                    level.push(self.missing_label_for_level(level_idx));
12043                    continue;
12044                }
12045                if code < -1 {
12046                    return Err(IndexError::InvalidArgument(format!(
12047                        "negative code {code} at level {level_idx}"
12048                    )));
12049                }
12050                let position = usize::try_from(code).map_err(|_| {
12051                    IndexError::InvalidArgument(format!("invalid code {code} at level {level_idx}"))
12052                })?;
12053                let label = labels.get(position).ok_or(IndexError::OutOfBounds {
12054                    position,
12055                    length: labels.len(),
12056                })?;
12057                level.push(label.clone());
12058            }
12059            levels.push(level);
12060        }
12061        Ok(Self {
12062            levels,
12063            names: self.names.clone(),
12064        })
12065    }
12066
12067    /// Drop unused level labels. This representation stores row labels directly,
12068    /// so there is no separate unused catalog to prune.
12069    #[must_use]
12070    pub fn remove_unused_levels(&self) -> Self {
12071        self.clone()
12072    }
12073
12074    /// Identity check, matching `pd.MultiIndex.is_`.
12075    #[must_use]
12076    pub fn is_(&self, other: &Self) -> bool {
12077        std::ptr::eq(self, other)
12078    }
12079
12080    /// Whether this MultiIndex is boolean-typed.
12081    #[must_use]
12082    pub fn is_boolean(&self) -> bool {
12083        false
12084    }
12085
12086    /// Whether this MultiIndex is categorical-typed.
12087    #[must_use]
12088    pub fn is_categorical(&self) -> bool {
12089        false
12090    }
12091
12092    /// Whether this MultiIndex is floating-typed.
12093    #[must_use]
12094    pub fn is_floating(&self) -> bool {
12095        false
12096    }
12097
12098    /// Whether this MultiIndex is integer-typed.
12099    #[must_use]
12100    pub fn is_integer(&self) -> bool {
12101        false
12102    }
12103
12104    /// Whether this MultiIndex is interval-typed.
12105    #[must_use]
12106    pub fn is_interval(&self) -> bool {
12107        false
12108    }
12109
12110    /// Whether this MultiIndex is numeric-typed.
12111    #[must_use]
12112    pub fn is_numeric(&self) -> bool {
12113        false
12114    }
12115
12116    /// Whether this MultiIndex is object-backed.
12117    #[must_use]
12118    pub fn is_object(&self) -> bool {
12119        true
12120    }
12121
12122    /// Compare row tuples only, matching `pd.MultiIndex.equals`.
12123    #[must_use]
12124    pub fn equals(&self, other: &Self) -> bool {
12125        self.levels == other.levels
12126    }
12127
12128    /// Compare row tuples and level names, matching `pd.MultiIndex.identical`.
12129    #[must_use]
12130    pub fn identical(&self, other: &Self) -> bool {
12131        self.equals(other) && self.names == other.names
12132    }
12133
12134    /// Compare unique level catalogs, matching `pd.MultiIndex.equal_levels`.
12135    #[must_use]
12136    pub fn equal_levels(&self, other: &Self) -> bool {
12137        self.levels() == other.levels()
12138    }
12139
12140    /// Get the labels for a specific level.
12141    ///
12142    /// Matches `pd.MultiIndex.get_level_values(level)`.
12143    pub fn get_level_values(&self, level: usize) -> Result<Index, IndexError> {
12144        if level >= self.levels.len() {
12145            return Err(IndexError::OutOfBounds {
12146                position: level,
12147                length: self.levels.len(),
12148            });
12149        }
12150        let mut idx = Index::new(self.levels[level].clone());
12151        if let Some(name) = self.names.get(level).and_then(|n| n.as_ref()) {
12152            idx = idx.set_name(name);
12153        }
12154        Ok(idx)
12155    }
12156
12157    /// Get the tuple of labels at a specific position.
12158    pub fn get_tuple(&self, position: usize) -> Option<Vec<&IndexLabel>> {
12159        if position >= self.len() {
12160            return None;
12161        }
12162        Some(self.levels.iter().map(|level| &level[position]).collect())
12163    }
12164
12165    /// Select rows by positional index.
12166    pub fn take(&self, positions: &[usize]) -> Result<Self, IndexError> {
12167        for &position in positions {
12168            if position >= self.len() {
12169                return Err(IndexError::OutOfBounds {
12170                    position,
12171                    length: self.len(),
12172                });
12173            }
12174        }
12175
12176        let mut levels = Vec::with_capacity(self.nlevels());
12177        for level in &self.levels {
12178            let selected = positions
12179                .iter()
12180                .map(|&position| level[position].clone())
12181                .collect();
12182            levels.push(selected);
12183        }
12184
12185        Ok(Self {
12186            levels,
12187            names: self.names.clone(),
12188        })
12189    }
12190
12191    /// Delete the tuple at a positional location.
12192    ///
12193    /// Matches `pd.MultiIndex.delete(loc)`.
12194    pub fn delete(&self, loc: usize) -> Result<Self, IndexError> {
12195        if loc >= self.len() {
12196            return Err(IndexError::OutOfBounds {
12197                position: loc,
12198                length: self.len(),
12199            });
12200        }
12201        let positions: Vec<usize> = (0..self.len()).filter(|&row| row != loc).collect();
12202        Ok(self.take_existing_positions(&positions))
12203    }
12204
12205    /// Insert a tuple at a positional location.
12206    ///
12207    /// Matches `pd.MultiIndex.insert(loc, item)`. Inserting into an empty
12208    /// zero-level MultiIndex adopts the tuple arity as the new level count.
12209    pub fn insert(&self, loc: usize, item: Vec<IndexLabel>) -> Result<Self, IndexError> {
12210        if loc > self.len() {
12211            return Err(IndexError::OutOfBounds {
12212                position: loc,
12213                length: self.len(),
12214            });
12215        }
12216        if self.nlevels() == 0 {
12217            if loc != 0 {
12218                return Err(IndexError::OutOfBounds {
12219                    position: loc,
12220                    length: 0,
12221                });
12222            }
12223            return Self::from_tuples(vec![item]);
12224        }
12225        if item.len() != self.nlevels() {
12226            return Err(IndexError::LengthMismatch {
12227                expected: self.nlevels(),
12228                actual: item.len(),
12229                context: "insert tuple arity mismatch".to_owned(),
12230            });
12231        }
12232
12233        let mut levels = self.levels.clone();
12234        for (level_idx, label) in item.into_iter().enumerate() {
12235            levels[level_idx].insert(loc, label);
12236        }
12237        Ok(Self {
12238            levels,
12239            names: self.names.clone(),
12240        })
12241    }
12242
12243    /// Drop every occurrence of the provided tuples.
12244    ///
12245    /// Matches `pd.MultiIndex.drop(labels)` with the default fail-closed
12246    /// behavior for missing labels.
12247    pub fn drop(&self, labels_to_drop: &[Vec<IndexLabel>]) -> Result<Self, IndexError> {
12248        for label in labels_to_drop {
12249            self.validate_key_arity(label, false)?;
12250        }
12251        // Reuse one key buffer across rows instead of materializing every row
12252        // tuple via to_list() (which allocates a Vec per row). `found` stores
12253        // references into `labels_to_drop` (stable across iterations) — obtained
12254        // from drop_set.get() — so the reused buffer never escapes. FxHashSet
12255        // replaces the std SipHash set. Bit-identical: same retained positions
12256        // and same missing-key detection (value-based membership).
12257        let drop_set: FxHashSet<&Vec<IndexLabel>> = labels_to_drop.iter().collect();
12258        let mut found: FxHashSet<&Vec<IndexLabel>> = FxHashSet::default();
12259        let mut positions = Vec::new();
12260        let mut key: Vec<IndexLabel> = Vec::with_capacity(self.nlevels());
12261        for row in 0..self.len() {
12262            key.clear();
12263            key.extend(self.levels.iter().map(|level| level[row].clone()));
12264            if let Some(matched) = drop_set.get(&key) {
12265                found.insert(*matched);
12266            } else {
12267                positions.push(row);
12268            }
12269        }
12270        if let Some(missing) = labels_to_drop.iter().find(|label| !found.contains(label)) {
12271            return Err(IndexError::InvalidArgument(format!(
12272                "tuple key not found: {:?}",
12273                missing
12274            )));
12275        }
12276        Ok(self.take_existing_positions(&positions))
12277    }
12278
12279    fn validate_key_arity(
12280        &self,
12281        key: &[IndexLabel],
12282        allow_partial: bool,
12283    ) -> Result<(), IndexError> {
12284        let nlevels = self.nlevels();
12285        if key.is_empty() {
12286            return Err(IndexError::InvalidArgument(
12287                "MultiIndex key must contain at least one level".to_owned(),
12288            ));
12289        }
12290        if (!allow_partial && key.len() != nlevels) || (allow_partial && key.len() > nlevels) {
12291            return Err(IndexError::InvalidArgument(format!(
12292                "wrong tuple arity: expected {}{}, got {}",
12293                if allow_partial { "1.." } else { "" },
12294                nlevels,
12295                key.len()
12296            )));
12297        }
12298        Ok(())
12299    }
12300
12301    fn row_matches_prefix(&self, row: usize, key: &[IndexLabel]) -> bool {
12302        key.iter()
12303            .enumerate()
12304            .all(|(level, expected)| &self.levels[level][row] == expected)
12305    }
12306
12307    fn row_prefix_cmp(&self, row: usize, key: &[IndexLabel]) -> std::cmp::Ordering {
12308        for (level, expected) in key.iter().enumerate() {
12309            let ord = self.levels[level][row].cmp(expected);
12310            if ord != std::cmp::Ordering::Equal {
12311                return ord;
12312            }
12313        }
12314        std::cmp::Ordering::Equal
12315    }
12316
12317    /// Return matching row positions for an exact tuple key.
12318    pub fn get_loc_tuple(&self, key: &[IndexLabel]) -> Result<Vec<usize>, IndexError> {
12319        self.validate_key_arity(key, false)?;
12320        let positions: Vec<usize> = (0..self.len())
12321            .filter(|&row| self.row_matches_prefix(row, key))
12322            .collect();
12323        if positions.is_empty() {
12324            return Err(IndexError::InvalidArgument(format!(
12325                "tuple key not found: {:?}",
12326                key
12327            )));
12328        }
12329        Ok(positions)
12330    }
12331
12332    /// Return row positions for an exact tuple, partial-prefix tuple, or a single level key.
12333    ///
12334    /// `level=None` treats `key` as an exact tuple when it has full arity, or a
12335    /// prefix tuple when shorter than `nlevels()`. `level=Some(n)` treats
12336    /// `key` as a single label lookup on that level.
12337    pub fn get_loc(
12338        &self,
12339        key: &[IndexLabel],
12340        level: Option<usize>,
12341    ) -> Result<Vec<usize>, IndexError> {
12342        if let Some(level) = level {
12343            if level >= self.nlevels() {
12344                return Err(IndexError::OutOfBounds {
12345                    position: level,
12346                    length: self.nlevels(),
12347                });
12348            }
12349            if key.len() != 1 {
12350                return Err(IndexError::InvalidArgument(format!(
12351                    "level lookup expects exactly one label, got {}",
12352                    key.len()
12353                )));
12354            }
12355            let positions: Vec<usize> = self.levels[level]
12356                .iter()
12357                .enumerate()
12358                .filter_map(|(row, label)| if label == &key[0] { Some(row) } else { None })
12359                .collect();
12360            if positions.is_empty() {
12361                return Err(IndexError::InvalidArgument(format!(
12362                    "level key not found at level {level}: {:?}",
12363                    key[0]
12364                )));
12365            }
12366            return Ok(positions);
12367        }
12368
12369        self.validate_key_arity(key, true)?;
12370        let positions: Vec<usize> = (0..self.len())
12371            .filter(|&row| self.row_matches_prefix(row, key))
12372            .collect();
12373        if positions.is_empty() {
12374            return Err(IndexError::InvalidArgument(format!(
12375                "tuple key not found: {:?}",
12376                key
12377            )));
12378        }
12379        Ok(positions)
12380    }
12381
12382    /// Return row positions for a list-like exact or partial-prefix selector.
12383    ///
12384    /// Matches the list-label subset of `pd.MultiIndex.get_locs(seq)`.
12385    pub fn get_locs(&self, key: &[IndexLabel]) -> Result<Vec<usize>, IndexError> {
12386        if key.is_empty() {
12387            return Ok(Vec::new());
12388        }
12389        self.get_loc(key, None)
12390    }
12391
12392    /// pandas-style partial tuple lookup returning matching positions and the remaining index.
12393    pub fn get_loc_level(
12394        &self,
12395        key: &[IndexLabel],
12396    ) -> Result<(Vec<usize>, Option<MultiIndexOrIndex>), IndexError> {
12397        let positions = self.get_loc(key, None)?;
12398        if key.len() == self.nlevels() {
12399            return Ok((positions, None));
12400        }
12401
12402        let mut remaining = MultiIndexOrIndex::Multi(self.take(&positions)?);
12403        for _ in 0..key.len() {
12404            remaining = match remaining {
12405                MultiIndexOrIndex::Multi(mi) => mi.droplevel(0)?,
12406                MultiIndexOrIndex::Index(_) => {
12407                    return Err(IndexError::InvalidArgument(
12408                        "cannot drop more levels than remain".to_owned(),
12409                    ));
12410                }
12411            };
12412        }
12413
12414        Ok((positions, Some(remaining)))
12415    }
12416
12417    /// Return `(start, stop)` bounds for a lexicographic tuple slice.
12418    ///
12419    /// The returned `stop` is exclusive, matching pandas `slice_locs`.
12420    pub fn slice_locs(
12421        &self,
12422        start: Option<&[IndexLabel]>,
12423        end: Option<&[IndexLabel]>,
12424    ) -> Result<(usize, usize), IndexError> {
12425        if let Some(start) = start {
12426            self.validate_key_arity(start, true)?;
12427        }
12428        if let Some(end) = end {
12429            self.validate_key_arity(end, true)?;
12430        }
12431
12432        let start_pos = match start {
12433            Some(start_key) => (0..self.len())
12434                .find(|&row| self.row_prefix_cmp(row, start_key) != std::cmp::Ordering::Less)
12435                .unwrap_or(self.len()),
12436            None => 0,
12437        };
12438        let end_pos = match end {
12439            Some(end_key) => (0..self.len())
12440                .rfind(|&row| self.row_prefix_cmp(row, end_key) != std::cmp::Ordering::Greater)
12441                .map_or(0, |row| row + 1),
12442            None => self.len(),
12443        };
12444
12445        if end_pos < start_pos {
12446            return Ok((start_pos, start_pos));
12447        }
12448        Ok((start_pos, end_pos))
12449    }
12450
12451    /// Bound for a tuple slice, matching `pd.MultiIndex.get_slice_bound`.
12452    pub fn get_slice_bound(&self, label: &[IndexLabel], side: &str) -> Result<usize, IndexError> {
12453        match side {
12454            "left" => Ok(self.slice_locs(Some(label), Some(label))?.0),
12455            "right" => Ok(self.slice_locs(Some(label), Some(label))?.1),
12456            other => Err(IndexError::InvalidArgument(format!(
12457                "get_slice_bound: side must be 'left' or 'right', got {other:?}"
12458            ))),
12459        }
12460    }
12461
12462    /// Alias for `slice_locs`, matching `pd.MultiIndex.slice_indexer`.
12463    pub fn slice_indexer(
12464        &self,
12465        start: Option<&[IndexLabel]>,
12466        end: Option<&[IndexLabel]>,
12467    ) -> Result<(usize, usize), IndexError> {
12468        self.slice_locs(start, end)
12469    }
12470
12471    /// Return a label-bounded range, matching `pd.MultiIndex.truncate`.
12472    ///
12473    /// Bounds are interpreted as partial or full tuple prefixes and are
12474    /// inclusive on both sides. Open-ended bounds retain the corresponding
12475    /// leading or trailing rows.
12476    pub fn truncate(
12477        &self,
12478        before: Option<&[IndexLabel]>,
12479        after: Option<&[IndexLabel]>,
12480    ) -> Result<Self, IndexError> {
12481        let (start, stop) = self.slice_locs(before, after)?;
12482        let positions: Vec<usize> = (start..stop).collect();
12483        Ok(self.take_existing_positions(&positions))
12484    }
12485
12486    /// Insertion positions for target tuples, matching `pd.MultiIndex.searchsorted`.
12487    ///
12488    /// `side` is `"left"` for the first valid insertion position or `"right"`
12489    /// for the position after an equal run. Like pandas, callers are expected
12490    /// to use this on lexicographically sorted indexes.
12491    pub fn searchsorted(&self, target: &Self, side: &str) -> Result<Vec<usize>, IndexError> {
12492        if side != "left" && side != "right" {
12493            return Err(IndexError::InvalidArgument(format!(
12494                "searchsorted: side must be 'left' or 'right', got {side:?}"
12495            )));
12496        }
12497
12498        Ok((0..target.len())
12499            .map(|target_row| {
12500                let needle = target.tuple_at(target_row);
12501                let mut lo = 0_usize;
12502                let mut hi = self.len();
12503                while lo < hi {
12504                    let mid = lo + (hi - lo) / 2;
12505                    let cmp = self.tuple_at(mid).cmp(&needle);
12506                    use std::cmp::Ordering;
12507                    let go_right = matches!(
12508                        (cmp, side),
12509                        (Ordering::Less, _) | (Ordering::Equal, "right")
12510                    );
12511                    if go_right {
12512                        lo = mid + 1;
12513                    } else {
12514                        hi = mid;
12515                    }
12516                }
12517                lo
12518            })
12519            .collect())
12520    }
12521
12522    /// Compute a non-unique indexer against another MultiIndex.
12523    ///
12524    /// Matches `pd.MultiIndex.get_indexer_non_unique(target)` by expanding
12525    /// every matching source position for each target tuple in target order.
12526    /// Missing target tuples contribute a single `-1` entry and their target
12527    /// position is recorded in the returned `missing` vector.
12528    #[must_use]
12529    /// Dictionary-encode every level of `self` and `target` into integer codes
12530    /// (consistent across both) and pack each row's tuple into one mixed-radix
12531    /// `u64` key (br-frankenpandas-mipack). Lets get_indexer hash an integer per
12532    /// row instead of allocating a `Vec<IndexLabel>` (and cloning Utf8 Strings)
12533    /// per row. Returns `None` when there are no levels, the level counts
12534    /// differ, or the combined code space overflows `u64` (caller keeps the
12535    /// `Vec<IndexLabel>`-key path). Bijective on tuple identity, so the source
12536    /// map and target lookups match exactly the same rows.
12537    /// Pack each row's tuple into one mixed-radix `u64` whose ascending order
12538    /// equals the lexicographic `row_cmp` order (br-frankenpandas-misort): per
12539    /// level, distinct values are ranked by `IndexLabel::Ord` and the rank codes
12540    /// are packed most-significant-first. So sorting these `u64` keys reproduces
12541    /// the level-by-level tuple sort while comparing integers instead of
12542    /// (Utf8) tuples. Returns `None` when there are no levels or the combined
12543    /// code space overflows `u64` (caller keeps the tuple-comparison sort).
12544    fn sorted_packed_keys(&self) -> Option<Vec<u64>> {
12545        let nlev = self.nlevels();
12546        if nlev == 0 {
12547            return None;
12548        }
12549        let n = self.len();
12550        let mut keys = vec![0u64; n];
12551        let mut combined: u128 = 1;
12552        for level in 0..nlev {
12553            let col = &self.levels[level];
12554            // Dedup to DISTINCT values first (O(n) hash), then sort only those
12555            // (O(d log d)); sorting all n refs would cost as much as the tuple
12556            // sort we are replacing.
12557            let mut sorted: Vec<&IndexLabel> = col.iter().collect::<FxHashSet<_>>().into_iter().collect();
12558            sorted.sort_unstable();
12559            let radix = sorted.len() as u64;
12560            let mut rank: FxHashMap<&IndexLabel, u64> =
12561                FxHashMap::with_capacity_and_hasher(sorted.len(), Default::default());
12562            for (r, value) in sorted.iter().enumerate() {
12563                rank.insert(*value, r as u64);
12564            }
12565            for (dst, value) in keys.iter_mut().zip(col.iter()) {
12566                *dst = dst.checked_mul(radix)?.checked_add(rank[value])?;
12567            }
12568            combined = combined.checked_mul(radix as u128)?;
12569            if combined > u64::MAX as u128 {
12570                return None;
12571            }
12572        }
12573        Some(keys)
12574    }
12575
12576    /// Pack each row's tuple into one mixed-radix `u64` using FIRST-SEEN per-level
12577    /// codes (br-frankenpandas-midedup). Unlike [`Self::sorted_packed_keys`] this
12578    /// skips the per-level distinct sort — dedup only needs the keys to be a
12579    /// bijection on tuple identity, not lexicographically ordered. Lets
12580    /// duplicated/unique/drop_duplicates hash one integer per row instead of
12581    /// allocating (and Utf8-cloning) a `Vec<IndexLabel>` per row. `None` when
12582    /// there are no levels or the combined code space overflows `u64`.
12583    fn identity_packed_keys(&self) -> Option<Vec<u64>> {
12584        let nlev = self.nlevels();
12585        if nlev == 0 {
12586            return None;
12587        }
12588        let n = self.len();
12589        let mut keys = vec![0u64; n];
12590        let mut combined: u128 = 1;
12591        for level in 0..nlev {
12592            let col = &self.levels[level];
12593            let mut code: FxHashMap<&IndexLabel, u64> =
12594                FxHashMap::with_capacity_and_hasher(col.len(), Default::default());
12595            let mut next = 0u64;
12596            let codes: Vec<u64> = col
12597                .iter()
12598                .map(|value| {
12599                    *code.entry(value).or_insert_with(|| {
12600                        let c = next;
12601                        next += 1;
12602                        c
12603                    })
12604                })
12605                .collect();
12606            let radix = next;
12607            for (dst, &c) in keys.iter_mut().zip(&codes) {
12608                *dst = dst.checked_mul(radix)?.checked_add(c)?;
12609            }
12610            combined = combined.checked_mul(radix as u128)?;
12611            if combined > u64::MAX as u128 {
12612                return None;
12613            }
12614        }
12615        Some(keys)
12616    }
12617
12618    fn factorize_packed_keys(&self, target: &Self) -> Option<(Vec<u64>, Vec<u64>)> {
12619        let nlev = self.nlevels();
12620        if nlev == 0 || nlev != target.nlevels() {
12621            return None;
12622        }
12623        let n = self.len();
12624        let m = target.len();
12625        let mut src = vec![0u64; n];
12626        let mut tgt = vec![0u64; m];
12627        let mut combined: u128 = 1;
12628        for level in 0..nlev {
12629            let mut codes: FxHashMap<&IndexLabel, u64> = FxHashMap::default();
12630            let mut next = 0u64;
12631            // Source first so its codes are dense and lookups stay consistent;
12632            // target-only values get fresh codes that no source key can match.
12633            let s_level = &self.levels[level];
12634            let t_level = &target.levels[level];
12635            let s_codes: Vec<u64> = (0..n)
12636                .map(|row| {
12637                    *codes.entry(&s_level[row]).or_insert_with(|| {
12638                        let c = next;
12639                        next += 1;
12640                        c
12641                    })
12642                })
12643                .collect();
12644            let t_codes: Vec<u64> = (0..m)
12645                .map(|row| {
12646                    *codes.entry(&t_level[row]).or_insert_with(|| {
12647                        let c = next;
12648                        next += 1;
12649                        c
12650                    })
12651                })
12652                .collect();
12653            // Mixed-radix: shift existing partial keys up by this level's radix
12654            // (= its distinct-value count) and add the new codes.
12655            let radix = next;
12656            for (dst, &c) in src.iter_mut().zip(&s_codes) {
12657                *dst = dst.checked_mul(radix)?.checked_add(c)?;
12658            }
12659            for (dst, &c) in tgt.iter_mut().zip(&t_codes) {
12660                *dst = dst.checked_mul(radix)?.checked_add(c)?;
12661            }
12662            combined = combined.checked_mul(radix as u128)?;
12663            if combined > u64::MAX as u128 {
12664                return None;
12665            }
12666        }
12667        Some((src, tgt))
12668    }
12669
12670    pub fn get_indexer_non_unique(&self, target: &Self) -> (Vec<isize>, Vec<usize>) {
12671        if self.nlevels() != target.nlevels() {
12672            return (vec![-1; target.len()], (0..target.len()).collect());
12673        }
12674
12675        if let Some((src_keys, tgt_keys)) = self.factorize_packed_keys(target) {
12676            let mut positions =
12677                FxHashMap::<u64, Vec<usize>>::with_capacity_and_hasher(self.len(), Default::default());
12678            for (row, &key) in src_keys.iter().enumerate() {
12679                positions.entry(key).or_default().push(row);
12680            }
12681            let mut indexer = Vec::new();
12682            let mut missing = Vec::new();
12683            for (target_row, &key) in tgt_keys.iter().enumerate() {
12684                if let Some(matches) = positions.get(&key) {
12685                    indexer.extend(matches.iter().map(|&pos| pos as isize));
12686                } else {
12687                    indexer.push(-1);
12688                    missing.push(target_row);
12689                }
12690            }
12691            return (indexer, missing);
12692        }
12693
12694        let mut positions = FxHashMap::<Vec<IndexLabel>, Vec<usize>>::with_capacity_and_hasher(
12695            self.len(),
12696            Default::default(),
12697        );
12698        for row in 0..self.len() {
12699            let key: Vec<IndexLabel> = self.levels.iter().map(|level| level[row].clone()).collect();
12700            positions.entry(key).or_default().push(row);
12701        }
12702
12703        let mut indexer = Vec::new();
12704        let mut missing = Vec::new();
12705        for target_row in 0..target.len() {
12706            let key: Vec<IndexLabel> = target
12707                .levels
12708                .iter()
12709                .map(|level| level[target_row].clone())
12710                .collect();
12711            if let Some(matches) = positions.get(&key) {
12712                indexer.extend(matches.iter().map(|&pos| pos as isize));
12713            } else {
12714                indexer.push(-1);
12715                missing.push(target_row);
12716            }
12717        }
12718
12719        (indexer, missing)
12720    }
12721
12722    /// Compute a positional indexer against another MultiIndex.
12723    ///
12724    /// Matches `pd.MultiIndex.get_indexer(target)` for unique source indexes:
12725    /// each target tuple maps to its first source position, and missing target
12726    /// tuples map to `-1`. Duplicate source tuples are rejected because pandas
12727    /// treats reindexing from a non-unique index as invalid; callers that want
12728    /// duplicate expansion should use [`Self::get_indexer_for`] or
12729    /// [`Self::get_indexer_non_unique`].
12730    pub fn get_indexer(&self, target: &Self) -> Result<Vec<isize>, IndexError> {
12731        if self.has_duplicates() {
12732            return Err(IndexError::InvalidArgument(
12733                "get_indexer requires a uniquely valued MultiIndex".to_owned(),
12734            ));
12735        }
12736        if self.nlevels() != target.nlevels() {
12737            return Ok(vec![-1; target.len()]);
12738        }
12739
12740        if let Some((src_keys, tgt_keys)) = self.factorize_packed_keys(target) {
12741            let mut positions = FxHashMap::<u64, isize>::with_capacity_and_hasher(
12742                self.len(),
12743                Default::default(),
12744            );
12745            for (row, &key) in src_keys.iter().enumerate() {
12746                positions
12747                    .entry(key)
12748                    .or_insert(isize::try_from(row).unwrap_or(isize::MAX));
12749            }
12750            return Ok(tgt_keys
12751                .iter()
12752                .map(|key| positions.get(key).copied().unwrap_or(-1))
12753                .collect());
12754        }
12755
12756        let mut positions = FxHashMap::<Vec<IndexLabel>, isize>::with_capacity_and_hasher(
12757            self.len(),
12758            Default::default(),
12759        );
12760        for row in 0..self.len() {
12761            positions
12762                .entry(self.tuple_at(row))
12763                .or_insert(isize::try_from(row).unwrap_or(isize::MAX));
12764        }
12765
12766        Ok((0..target.len())
12767            .map(|target_row| {
12768                let key = target.tuple_at(target_row);
12769                positions.get(&key).copied().unwrap_or(-1)
12770            })
12771            .collect())
12772    }
12773
12774    /// Compute a positional indexer, expanding duplicate source matches.
12775    ///
12776    /// Matches `pd.MultiIndex.get_indexer_for(target)`: unique source indexes
12777    /// use the compact one-position-per-target form, while non-unique source
12778    /// indexes expand every matching source position for each target tuple.
12779    pub fn get_indexer_for(&self, target: &Self) -> Result<Vec<isize>, IndexError> {
12780        if self.has_duplicates() {
12781            Ok(self.get_indexer_non_unique(target).0)
12782        } else {
12783            self.get_indexer(target)
12784        }
12785    }
12786
12787    /// Reindex to a target MultiIndex, returning the target and source positions.
12788    ///
12789    /// Matches `pd.MultiIndex.reindex(target)` for unique source indexes:
12790    /// the returned index is the requested target, and the indexer maps each
12791    /// target tuple to its source position or `-1` for missing tuples.
12792    pub fn reindex(&self, target: &Self) -> Result<(Self, Vec<isize>), IndexError> {
12793        Ok((target.clone(), self.get_indexer(target)?))
12794    }
12795
12796    /// Per-row flag for duplicated composite tuples.
12797    ///
12798    /// Matches `pd.MultiIndex.duplicated(keep='first'|'last'|False)`.
12799    /// - `DuplicateKeep::First` marks all but the first occurrence of each
12800    ///   tuple as duplicated (pandas default).
12801    /// - `DuplicateKeep::Last` marks all but the last occurrence.
12802    /// - `DuplicateKeep::None` marks every occurrence of any tuple that
12803    ///   appears more than once.
12804    #[must_use]
12805    pub fn duplicated(&self, keep: DuplicateKeep) -> Vec<bool> {
12806        let len = self.len();
12807        let mut out = vec![false; len];
12808        if len == 0 {
12809            return out;
12810        }
12811        // Materialize each row's composite key exactly once per pass. The prior
12812        // version built BOTH a counts and a first_seen map for every keep mode
12813        // (incl. a key.clone()) and then rebuilt the key again in the keep-mode
12814        // loop — 3-4 Vec<IndexLabel> allocations per row. Each mode now does the
12815        // minimal work; output is positional so marking order is irrelevant.
12816        // Packed-key fast path (br-frankenpandas-midedup): one u64 per row keyed
12817        // on tuple identity, so dedup hashes integers instead of allocating (and
12818        // Utf8-cloning) a Vec<IndexLabel> per row. Bijective on identity ⇒ the
12819        // dup mask is identical to the Vec-key path.
12820        if let Some(keys) = self.identity_packed_keys() {
12821            match keep {
12822                DuplicateKeep::First => {
12823                    let mut seen: FxHashSet<u64> =
12824                        FxHashSet::with_capacity_and_hasher(len, Default::default());
12825                    for (row, slot) in out.iter_mut().enumerate() {
12826                        if !seen.insert(keys[row]) {
12827                            *slot = true;
12828                        }
12829                    }
12830                }
12831                DuplicateKeep::Last => {
12832                    let mut seen: FxHashSet<u64> =
12833                        FxHashSet::with_capacity_and_hasher(len, Default::default());
12834                    for row in (0..len).rev() {
12835                        if !seen.insert(keys[row]) {
12836                            out[row] = true;
12837                        }
12838                    }
12839                }
12840                DuplicateKeep::None => {
12841                    let mut counts: FxHashMap<u64, usize> =
12842                        FxHashMap::with_capacity_and_hasher(len, Default::default());
12843                    for &key in &keys {
12844                        *counts.entry(key).or_insert(0) += 1;
12845                    }
12846                    for (row, slot) in out.iter_mut().enumerate() {
12847                        if counts[&keys[row]] > 1 {
12848                            *slot = true;
12849                        }
12850                    }
12851                }
12852            }
12853            return out;
12854        }
12855
12856        let key_at = |row: usize| -> Vec<IndexLabel> {
12857            self.levels.iter().map(|level| level[row].clone()).collect()
12858        };
12859        match keep {
12860            DuplicateKeep::First => {
12861                // First occurrence kept; a key already seen earlier is a dup.
12862                let mut seen: FxHashSet<Vec<IndexLabel>> =
12863                    FxHashSet::with_capacity_and_hasher(len, Default::default());
12864                for (row, slot) in out.iter_mut().enumerate() {
12865                    if !seen.insert(key_at(row)) {
12866                        *slot = true;
12867                    }
12868                }
12869            }
12870            DuplicateKeep::Last => {
12871                // Last occurrence kept; scanning in reverse, the first key seen
12872                // in reverse is the last forward occurrence (kept = false).
12873                let mut seen: FxHashSet<Vec<IndexLabel>> =
12874                    FxHashSet::with_capacity_and_hasher(len, Default::default());
12875                for row in (0..len).rev() {
12876                    if !seen.insert(key_at(row)) {
12877                        out[row] = true;
12878                    }
12879                }
12880            }
12881            DuplicateKeep::None => {
12882                // Every occurrence of a key with count > 1 is a dup.
12883                let mut counts: FxHashMap<Vec<IndexLabel>, usize> =
12884                    FxHashMap::with_capacity_and_hasher(len, Default::default());
12885                for row in 0..len {
12886                    *counts.entry(key_at(row)).or_insert(0) += 1;
12887                }
12888                for (row, slot) in out.iter_mut().enumerate() {
12889                    if counts[&key_at(row)] > 1 {
12890                        *slot = true;
12891                    }
12892                }
12893            }
12894        }
12895        out
12896    }
12897
12898    /// Whether all composite tuples are unique.
12899    ///
12900    /// Matches `pd.MultiIndex.is_unique`.
12901    #[must_use]
12902    pub fn is_unique(&self) -> bool {
12903        !self.duplicated(DuplicateKeep::First).iter().any(|&b| b)
12904    }
12905
12906    /// Whether any composite tuple appears more than once.
12907    ///
12908    /// Matches `pd.MultiIndex.has_duplicates`.
12909    #[must_use]
12910    pub fn has_duplicates(&self) -> bool {
12911        !self.is_unique()
12912    }
12913
12914    /// Drop duplicated tuples with pandas' default `keep='first'`.
12915    #[must_use]
12916    pub fn drop_duplicates(&self) -> Self {
12917        self.drop_duplicates_keep(DuplicateKeep::First)
12918    }
12919
12920    /// Drop duplicated tuples with explicit keep behavior.
12921    #[must_use]
12922    pub fn drop_duplicates_keep(&self, keep: DuplicateKeep) -> Self {
12923        let duplicated = self.duplicated(keep);
12924        let positions: Vec<usize> = duplicated
12925            .iter()
12926            .enumerate()
12927            .filter_map(|(position, is_duplicated)| (!is_duplicated).then_some(position))
12928            .collect();
12929        self.take_existing_positions(&positions)
12930    }
12931
12932    /// Unique tuples, preserving first-seen order.
12933    #[must_use]
12934    pub fn unique(&self) -> Self {
12935        self.drop_duplicates_keep(DuplicateKeep::First)
12936    }
12937
12938    /// Number of unique tuples.
12939    #[must_use]
12940    pub fn nunique(&self) -> usize {
12941        self.unique().len()
12942    }
12943
12944    /// Unsupported boolean reduction, matching `pd.MultiIndex.all()`.
12945    pub fn all(&self) -> Result<bool, IndexError> {
12946        Err(IndexError::InvalidArgument(
12947            "cannot perform all with this index type: MultiIndex".to_owned(),
12948        ))
12949    }
12950
12951    /// Unsupported boolean reduction, matching `pd.MultiIndex.any()`.
12952    pub fn any(&self) -> Result<bool, IndexError> {
12953        Err(IndexError::InvalidArgument(
12954            "cannot perform any with this index type: MultiIndex".to_owned(),
12955        ))
12956    }
12957
12958    /// Factorize tuples into integer codes and unique tuples.
12959    ///
12960    /// Missing labels remain part of the composite tuple identity, matching
12961    /// pandas' MultiIndex-level factorization behavior.
12962    #[must_use]
12963    pub fn factorize(&self) -> (Vec<isize>, Self) {
12964        let mut positions = HashMap::<Vec<IndexLabel>, isize>::new();
12965        let mut uniques = Vec::<Vec<IndexLabel>>::new();
12966        let mut codes = Vec::with_capacity(self.len());
12967        for tuple in self.to_list() {
12968            if let Some(code) = positions.get(&tuple) {
12969                codes.push(*code);
12970            } else {
12971                let code = isize::try_from(uniques.len()).unwrap_or(isize::MAX);
12972                positions.insert(tuple.clone(), code);
12973                uniques.push(tuple);
12974                codes.push(code);
12975            }
12976        }
12977        let mut levels: Vec<Vec<IndexLabel>> = (0..self.nlevels())
12978            .map(|_| Vec::with_capacity(uniques.len()))
12979            .collect();
12980        for tuple in uniques {
12981            for (level_idx, label) in tuple.into_iter().enumerate() {
12982                levels[level_idx].push(label);
12983            }
12984        }
12985        let unique_index = Self {
12986            levels,
12987            names: self.names.clone(),
12988        };
12989        (codes, unique_index)
12990    }
12991
12992    /// Count unique tuple occurrences, sorted by count descending then tuple.
12993    #[must_use]
12994    pub fn value_counts(&self) -> Vec<(Vec<IndexLabel>, usize)> {
12995        let mut counts = HashMap::<Vec<IndexLabel>, usize>::new();
12996        for tuple in self.to_list() {
12997            *counts.entry(tuple).or_insert(0) += 1;
12998        }
12999        let mut pairs: Vec<(Vec<IndexLabel>, usize)> = counts.into_iter().collect();
13000        pairs.sort_by(|(left_tuple, left_count), (right_tuple, right_count)| {
13001            right_count
13002                .cmp(left_count)
13003                .then_with(|| left_tuple.cmp(right_tuple))
13004        });
13005        pairs
13006    }
13007
13008    /// Positional sorter for lexicographic tuple order.
13009    #[must_use]
13010    pub fn argsort(&self) -> Vec<usize> {
13011        let mut order: Vec<usize> = (0..self.len()).collect();
13012        // Packed-key fast path: sort on one u64 per row (ascending u64 order ==
13013        // lexicographic row_cmp order) instead of comparing (Utf8) tuples. The
13014        // `.then(left.cmp(right))` original-position tiebreak is preserved, so
13015        // the permutation is identical to the tuple-comparison sort.
13016        if let Some(keys) = self.sorted_packed_keys() {
13017            order.sort_by(|&left, &right| {
13018                keys[left].cmp(&keys[right]).then_with(|| left.cmp(&right))
13019            });
13020            return order;
13021        }
13022        order.sort_by(|&left, &right| self.row_cmp(left, right).then_with(|| left.cmp(&right)));
13023        order
13024    }
13025
13026    /// Sort tuples lexicographically, matching `pd.MultiIndex.sort_values()`.
13027    #[must_use]
13028    pub fn sort_values(&self) -> Self {
13029        self.take_existing_positions(&self.argsort())
13030    }
13031
13032    /// Alias for `sort_values`, matching `pd.MultiIndex.sort`.
13033    #[must_use]
13034    pub fn sort(&self) -> Self {
13035        self.sort_values()
13036    }
13037
13038    /// Sort tuples and return the positional indexer used for the sort.
13039    #[must_use]
13040    pub fn sortlevel(&self) -> (Self, Vec<usize>) {
13041        let order = self.argsort();
13042        (self.take_existing_positions(&order), order)
13043    }
13044
13045    /// Lexicographic minimum tuple.
13046    #[must_use]
13047    pub fn min(&self) -> Option<Vec<IndexLabel>> {
13048        self.argsort()
13049            .first()
13050            .map(|&position| self.tuple_at(position))
13051    }
13052
13053    /// Lexicographic maximum tuple.
13054    #[must_use]
13055    pub fn max(&self) -> Option<Vec<IndexLabel>> {
13056        self.argsort()
13057            .last()
13058            .map(|&position| self.tuple_at(position))
13059    }
13060
13061    /// Position of the maximum tuple.
13062    #[must_use]
13063    pub fn argmax(&self) -> Option<usize> {
13064        self.argsort().last().copied()
13065    }
13066
13067    /// Position of the minimum tuple.
13068    #[must_use]
13069    pub fn argmin(&self) -> Option<usize> {
13070        self.argsort().first().copied()
13071    }
13072
13073    /// Append another MultiIndex to this one.
13074    ///
13075    /// Matches `pd.MultiIndex.append(other)` for equal-level indexes.
13076    pub fn append(&self, other: &Self) -> Result<Self, IndexError> {
13077        self.ensure_same_nlevels(other)?;
13078        let mut levels = Vec::with_capacity(self.nlevels());
13079        for level_idx in 0..self.nlevels() {
13080            let mut level = self.levels[level_idx].clone();
13081            level.extend(other.levels[level_idx].iter().cloned());
13082            levels.push(level);
13083        }
13084        Ok(Self {
13085            levels,
13086            names: self.shared_names(other),
13087        })
13088    }
13089
13090    /// Repeat each tuple `repeats` times, matching `pd.MultiIndex.repeat`.
13091    #[must_use]
13092    pub fn repeat(&self, repeats: usize) -> Self {
13093        if repeats == 1 {
13094            return self.clone();
13095        }
13096        let mut levels = Vec::with_capacity(self.nlevels());
13097        for level in &self.levels {
13098            let mut repeated = Vec::with_capacity(level.len() * repeats);
13099            for label in level {
13100                for _ in 0..repeats {
13101                    repeated.push(label.clone());
13102                }
13103            }
13104            levels.push(repeated);
13105        }
13106        Self {
13107            levels,
13108            names: self.names.clone(),
13109        }
13110    }
13111
13112    /// Drop tuples containing any missing level label.
13113    ///
13114    /// Matches `pd.MultiIndex.dropna(how='any')`, which is pandas' default.
13115    #[must_use]
13116    pub fn dropna(&self) -> Self {
13117        self.dropna_any()
13118    }
13119
13120    /// Drop tuples containing any missing level label.
13121    #[must_use]
13122    pub fn dropna_any(&self) -> Self {
13123        let positions: Vec<usize> = (0..self.len())
13124            .filter(|&row| self.levels.iter().all(|level| !level[row].is_missing()))
13125            .collect();
13126        self.take_existing_positions(&positions)
13127    }
13128
13129    /// Drop tuples whose level labels are all missing.
13130    #[must_use]
13131    pub fn dropna_all(&self) -> Self {
13132        let positions: Vec<usize> = (0..self.len())
13133            .filter(|&row| !self.levels.iter().all(|level| level[row].is_missing()))
13134            .collect();
13135        self.take_existing_positions(&positions)
13136    }
13137
13138    /// Tuple intersection preserving left order and de-duplicating results.
13139    pub fn intersection(&self, other: &Self) -> Result<Self, IndexError> {
13140        self.ensure_same_nlevels(other)?;
13141        // Packed-key fast path (br-frankenpandas-misetop): identity-coded u64 per
13142        // row instead of to_list() (per-row Vec<IndexLabel> + Utf8 clone) and a
13143        // SipHash HashMap<Vec<IndexLabel>>. Keep self rows whose key is in other,
13144        // deduped first-seen, then gather those positions. Bijective on tuple
13145        // identity ⇒ same kept rows, same order.
13146        if let Some((self_keys, other_keys)) = self.factorize_packed_keys(other)
13147        {
13148            let other_set: FxHashSet<u64> = other_keys.into_iter().collect();
13149            let mut seen: FxHashSet<u64> =
13150                FxHashSet::with_capacity_and_hasher(self_keys.len(), Default::default());
13151            let positions: Vec<usize> = self_keys
13152                .iter()
13153                .enumerate()
13154                .filter_map(|(i, &k)| (other_set.contains(&k) && seen.insert(k)).then_some(i))
13155                .collect();
13156            return Ok(self
13157                .take_existing_positions(&positions)
13158                .set_names(self.shared_names(other)));
13159        }
13160        let other_keys: HashMap<Vec<IndexLabel>, ()> = other
13161            .to_list()
13162            .into_iter()
13163            .map(|tuple| (tuple, ()))
13164            .collect();
13165        let mut seen = HashMap::<Vec<IndexLabel>, ()>::new();
13166        let tuples = self
13167            .to_list()
13168            .into_iter()
13169            .filter(|tuple| {
13170                other_keys.contains_key(tuple) && seen.insert(tuple.clone(), ()).is_none()
13171            })
13172            .collect();
13173        Self::from_tuples_with_names(tuples, self.shared_names(other))
13174    }
13175
13176    /// Tuple union preserving first-seen order from `self` then `other`.
13177    pub fn union(&self, other: &Self) -> Result<Self, IndexError> {
13178        self.ensure_same_nlevels(other)?;
13179        let mut seen = HashMap::<Vec<IndexLabel>, ()>::new();
13180        let mut tuples = Vec::with_capacity(self.len() + other.len());
13181        for tuple in self.to_list().into_iter().chain(other.to_list()) {
13182            if seen.insert(tuple.clone(), ()).is_none() {
13183                tuples.push(tuple);
13184            }
13185        }
13186        Self::from_tuples_with_names(tuples, self.shared_names(other))
13187    }
13188
13189    /// Alias for `union`, matching the flat `Index::union_with` naming.
13190    pub fn union_with(&self, other: &Self) -> Result<Self, IndexError> {
13191        self.union(other)
13192    }
13193
13194    /// Tuple difference preserving left order and de-duplicating results.
13195    pub fn difference(&self, other: &Self) -> Result<Self, IndexError> {
13196        self.ensure_same_nlevels(other)?;
13197        // Packed-key fast path (br-frankenpandas-misetop): keep self rows whose
13198        // key is NOT in other, deduped first-seen. See intersection.
13199        if let Some((self_keys, other_keys)) = self.factorize_packed_keys(other)
13200        {
13201            let other_set: FxHashSet<u64> = other_keys.into_iter().collect();
13202            let mut seen: FxHashSet<u64> =
13203                FxHashSet::with_capacity_and_hasher(self_keys.len(), Default::default());
13204            let positions: Vec<usize> = self_keys
13205                .iter()
13206                .enumerate()
13207                .filter_map(|(i, &k)| (!other_set.contains(&k) && seen.insert(k)).then_some(i))
13208                .collect();
13209            return Ok(self
13210                .take_existing_positions(&positions)
13211                .set_names(self.shared_names(other)));
13212        }
13213        let other_keys: HashMap<Vec<IndexLabel>, ()> = other
13214            .to_list()
13215            .into_iter()
13216            .map(|tuple| (tuple, ()))
13217            .collect();
13218        let mut seen = HashMap::<Vec<IndexLabel>, ()>::new();
13219        let tuples = self
13220            .to_list()
13221            .into_iter()
13222            .filter(|tuple| {
13223                !other_keys.contains_key(tuple) && seen.insert(tuple.clone(), ()).is_none()
13224            })
13225            .collect();
13226        Self::from_tuples_with_names(tuples, self.shared_names(other))
13227    }
13228
13229    /// Tuple symmetric difference preserving first-seen order.
13230    pub fn symmetric_difference(&self, other: &Self) -> Result<Self, IndexError> {
13231        self.ensure_same_nlevels(other)?;
13232        let self_keys: HashMap<Vec<IndexLabel>, ()> = self
13233            .to_list()
13234            .into_iter()
13235            .map(|tuple| (tuple, ()))
13236            .collect();
13237        let other_keys: HashMap<Vec<IndexLabel>, ()> = other
13238            .to_list()
13239            .into_iter()
13240            .map(|tuple| (tuple, ()))
13241            .collect();
13242        let mut seen = HashMap::<Vec<IndexLabel>, ()>::new();
13243        let mut tuples = Vec::new();
13244        for tuple in self.to_list() {
13245            if !other_keys.contains_key(&tuple) && seen.insert(tuple.clone(), ()).is_none() {
13246                tuples.push(tuple);
13247            }
13248        }
13249        for tuple in other.to_list() {
13250            if !self_keys.contains_key(&tuple) && seen.insert(tuple.clone(), ()).is_none() {
13251                tuples.push(tuple);
13252            }
13253        }
13254        Self::from_tuples_with_names(tuples, self.shared_names(other))
13255    }
13256
13257    /// Group tuple positions by composite key, matching `pd.MultiIndex.groupby`.
13258    #[must_use]
13259    pub fn groupby(&self) -> HashMap<Vec<IndexLabel>, Vec<usize>> {
13260        let mut groups = HashMap::<Vec<IndexLabel>, Vec<usize>>::new();
13261        for row in 0..self.len() {
13262            groups.entry(self.tuple_at(row)).or_default().push(row);
13263        }
13264        groups
13265    }
13266
13267    /// Join two MultiIndexes using pandas-style join modes.
13268    pub fn join(&self, other: &Self, how: &str) -> Result<Self, IndexError> {
13269        match how {
13270            "left" => Ok(self.clone()),
13271            "right" => Ok(other.clone()),
13272            "inner" => self.intersection(other),
13273            "outer" => self.union(other),
13274            other => Err(IndexError::InvalidArgument(format!(
13275                "join: how must be 'left', 'right', 'inner', or 'outer', got {other:?}"
13276            ))),
13277        }
13278    }
13279
13280    /// Per-row membership test against a set of tuples.
13281    ///
13282    /// Matches `pd.MultiIndex.isin(values)`. Each entry in the returned
13283    /// bool vector is `true` iff that row's composite tuple appears in
13284    /// `values`. Tuples whose length does not match the MultiIndex's
13285    /// number of levels never match (silently contribute `false`),
13286    /// matching pandas' lenient behavior.
13287    #[must_use]
13288    pub fn isin(&self, values: &[Vec<IndexLabel>]) -> Vec<bool> {
13289        let nlevels = self.nlevels();
13290        let lookup: FxHashSet<&Vec<IndexLabel>> =
13291            values.iter().filter(|v| v.len() == nlevels).collect();
13292        if lookup.is_empty() {
13293            return vec![false; self.len()];
13294        }
13295        // Reuse one key buffer across rows: clear + extend refills the composite
13296        // lookup key in place, so membership is tested without allocating a fresh
13297        // Vec<IndexLabel> per row. Result is identical (value-based membership,
13298        // positional bool output).
13299        let mut key: Vec<IndexLabel> = Vec::with_capacity(nlevels);
13300        let mut out = Vec::with_capacity(self.len());
13301        for row in 0..self.len() {
13302            key.clear();
13303            key.extend(self.levels.iter().map(|level| level[row].clone()));
13304            out.push(lookup.contains(&key));
13305        }
13306        out
13307    }
13308
13309    /// Per-row membership test against values for a single level.
13310    ///
13311    /// Matches `pd.MultiIndex.isin(values, level=...)`. Returns `true`
13312    /// for positions whose label at `level` is in `values`. Returns an
13313    /// `OutOfBounds` error when `level` exceeds `nlevels()`.
13314    pub fn isin_level(&self, values: &[IndexLabel], level: usize) -> Result<Vec<bool>, IndexError> {
13315        if level >= self.nlevels() {
13316            return Err(IndexError::OutOfBounds {
13317                position: level,
13318                length: self.nlevels(),
13319            });
13320        }
13321        let lookup: FxHashSet<&IndexLabel> = values.iter().collect();
13322        Ok(self.levels[level]
13323            .iter()
13324            .map(|label| lookup.contains(label))
13325            .collect())
13326    }
13327
13328    /// Construct a MultiIndex from tuples of labels.
13329    ///
13330    /// Matches `pd.MultiIndex.from_tuples(tuples)`.
13331    /// Each inner Vec represents one row's labels across all levels.
13332    pub fn from_tuples(tuples: Vec<Vec<IndexLabel>>) -> Result<Self, IndexError> {
13333        if tuples.is_empty() {
13334            return Ok(Self {
13335                levels: Vec::new(),
13336                names: Vec::new(),
13337            });
13338        }
13339
13340        let nlevels = tuples[0].len();
13341        for (i, t) in tuples.iter().enumerate() {
13342            if t.len() != nlevels {
13343                return Err(IndexError::LengthMismatch {
13344                    expected: nlevels,
13345                    actual: t.len(),
13346                    context: format!("tuple at position {i} has wrong number of levels"),
13347                });
13348            }
13349        }
13350
13351        let mut levels: Vec<Vec<IndexLabel>> = (0..nlevels)
13352            .map(|_| Vec::with_capacity(tuples.len()))
13353            .collect();
13354        for tuple in &tuples {
13355            for (level_idx, label) in tuple.iter().enumerate() {
13356                levels[level_idx].push(label.clone());
13357            }
13358        }
13359
13360        Ok(Self {
13361            levels,
13362            names: vec![None; nlevels],
13363        })
13364    }
13365
13366    /// Construct a MultiIndex from parallel arrays (one per level).
13367    ///
13368    /// Matches `pd.MultiIndex.from_arrays(arrays)`.
13369    pub fn from_arrays(arrays: Vec<Vec<IndexLabel>>) -> Result<Self, IndexError> {
13370        if arrays.is_empty() {
13371            return Ok(Self {
13372                levels: Vec::new(),
13373                names: Vec::new(),
13374            });
13375        }
13376
13377        let expected_len = arrays[0].len();
13378        for (i, arr) in arrays.iter().enumerate() {
13379            if arr.len() != expected_len {
13380                return Err(IndexError::LengthMismatch {
13381                    expected: expected_len,
13382                    actual: arr.len(),
13383                    context: format!("level {i} array length mismatch"),
13384                });
13385            }
13386        }
13387
13388        let nlevels = arrays.len();
13389        Ok(Self {
13390            levels: arrays,
13391            names: vec![None; nlevels],
13392        })
13393    }
13394
13395    /// Construct a MultiIndex from frame-like columns.
13396    ///
13397    /// Matches `pd.MultiIndex.from_frame(frame)` at the payload level:
13398    /// each input entry is one frame column, with the optional column name
13399    /// becoming the corresponding level name.
13400    pub fn from_frame(columns: Vec<(Option<String>, Vec<IndexLabel>)>) -> Result<Self, IndexError> {
13401        if columns.is_empty() {
13402            return Ok(Self {
13403                levels: Vec::new(),
13404                names: Vec::new(),
13405            });
13406        }
13407
13408        let expected_len = columns[0].1.len();
13409        for (column_idx, (_, values)) in columns.iter().enumerate() {
13410            if values.len() != expected_len {
13411                return Err(IndexError::LengthMismatch {
13412                    expected: expected_len,
13413                    actual: values.len(),
13414                    context: format!("from_frame column {column_idx} length mismatch"),
13415                });
13416            }
13417        }
13418
13419        let mut names = Vec::with_capacity(columns.len());
13420        let mut levels = Vec::with_capacity(columns.len());
13421        for (name, values) in columns {
13422            names.push(name);
13423            levels.push(values);
13424        }
13425
13426        Ok(Self { levels, names })
13427    }
13428
13429    /// Construct a MultiIndex from the Cartesian product of iterables.
13430    ///
13431    /// Matches `pd.MultiIndex.from_product(iterables)`.
13432    pub fn from_product(iterables: Vec<Vec<IndexLabel>>) -> Result<Self, IndexError> {
13433        if iterables.is_empty() {
13434            return Ok(Self {
13435                levels: Vec::new(),
13436                names: Vec::new(),
13437            });
13438        }
13439
13440        // Compute total size of the Cartesian product.
13441        let total: usize = iterables.iter().map(Vec::len).product();
13442        if total == 0 {
13443            let nlevels = iterables.len();
13444            return Ok(Self {
13445                levels: (0..nlevels).map(|_| Vec::new()).collect(),
13446                names: vec![None; nlevels],
13447            });
13448        }
13449
13450        let nlevels = iterables.len();
13451        let mut levels: Vec<Vec<IndexLabel>> =
13452            (0..nlevels).map(|_| Vec::with_capacity(total)).collect();
13453
13454        // Generate Cartesian product: for each position, compute which
13455        // element from each level by dividing position by the product of
13456        // all subsequent level lengths.
13457        for pos in 0..total {
13458            let mut remaining = pos;
13459            for (level_idx, iterable) in iterables.iter().enumerate().rev() {
13460                let idx_in_level = remaining % iterable.len();
13461                remaining /= iterable.len();
13462                levels[level_idx].push(iterable[idx_in_level].clone());
13463            }
13464        }
13465
13466        Ok(Self {
13467            levels,
13468            names: vec![None; nlevels],
13469        })
13470    }
13471
13472    /// Flatten this MultiIndex into a single-level Index by joining
13473    /// level labels with a separator.
13474    ///
13475    /// Matches `pd.MultiIndex.to_flat_index()` (approximately).
13476    #[must_use]
13477    pub fn to_flat_index(&self, sep: &str) -> Index {
13478        let n = self.len();
13479        let labels: Vec<IndexLabel> = (0..n)
13480            .map(|i| {
13481                let parts: Vec<String> = self
13482                    .levels
13483                    .iter()
13484                    .map(|level| level[i].to_string())
13485                    .collect();
13486                IndexLabel::Utf8(parts.join(sep))
13487            })
13488            .collect();
13489        Index::new(labels)
13490    }
13491
13492    /// Drop a level from this MultiIndex, returning a new MultiIndex
13493    /// (or an Index if only one level remains).
13494    ///
13495    /// Matches `pd.MultiIndex.droplevel(level)`.
13496    pub fn droplevel(&self, level: usize) -> Result<MultiIndexOrIndex, IndexError> {
13497        if level >= self.nlevels() {
13498            return Err(IndexError::OutOfBounds {
13499                position: level,
13500                length: self.nlevels(),
13501            });
13502        }
13503        if self.nlevels() <= 1 {
13504            return Err(IndexError::OutOfBounds {
13505                position: level,
13506                length: self.nlevels(),
13507            });
13508        }
13509
13510        let mut new_levels = self.levels.clone();
13511        new_levels.remove(level);
13512        let mut new_names = self.names.clone();
13513        new_names.remove(level);
13514
13515        if new_levels.len() == 1 {
13516            let mut idx = Index::new(new_levels.into_iter().next().unwrap());
13517            if let Some(ref name) = new_names[0] {
13518                idx = idx.set_name(name);
13519            }
13520            Ok(MultiIndexOrIndex::Index(idx))
13521        } else {
13522            Ok(MultiIndexOrIndex::Multi(Self {
13523                levels: new_levels,
13524                names: new_names,
13525            }))
13526        }
13527    }
13528
13529    /// Swap two levels.
13530    ///
13531    /// Matches `pd.MultiIndex.swaplevel(i, j)`.
13532    pub fn swaplevel(&self, i: usize, j: usize) -> Result<Self, IndexError> {
13533        if i >= self.nlevels() || j >= self.nlevels() {
13534            return Err(IndexError::OutOfBounds {
13535                position: i.max(j),
13536                length: self.nlevels(),
13537            });
13538        }
13539        let mut new_levels = self.levels.clone();
13540        let mut new_names = self.names.clone();
13541        new_levels.swap(i, j);
13542        new_names.swap(i, j);
13543        Ok(Self {
13544            levels: new_levels,
13545            names: new_names,
13546        })
13547    }
13548
13549    /// Reorder levels according to the given order.
13550    ///
13551    /// Matches `pd.MultiIndex.reorder_levels(order)`.
13552    /// `order` is a slice of level indices specifying the new order.
13553    /// Must contain each level index exactly once.
13554    pub fn reorder_levels(&self, order: &[usize]) -> Result<Self, IndexError> {
13555        if order.len() != self.nlevels() {
13556            return Err(IndexError::LengthMismatch {
13557                expected: self.nlevels(),
13558                actual: order.len(),
13559                context: "reorder_levels: order length must match nlevels".into(),
13560            });
13561        }
13562
13563        // Validate all indices are in range and unique.
13564        let mut seen = vec![false; self.nlevels()];
13565        for &idx in order {
13566            if idx >= self.nlevels() {
13567                return Err(IndexError::OutOfBounds {
13568                    position: idx,
13569                    length: self.nlevels(),
13570                });
13571            }
13572            if seen[idx] {
13573                return Err(IndexError::LengthMismatch {
13574                    expected: self.nlevels(),
13575                    actual: order.len(),
13576                    context: format!("reorder_levels: duplicate level index {idx}"),
13577                });
13578            }
13579            seen[idx] = true;
13580        }
13581
13582        let new_levels: Vec<Vec<IndexLabel>> =
13583            order.iter().map(|&idx| self.levels[idx].clone()).collect();
13584        let new_names: Vec<Option<String>> =
13585            order.iter().map(|&idx| self.names[idx].clone()).collect();
13586
13587        Ok(Self {
13588            levels: new_levels,
13589            names: new_names,
13590        })
13591    }
13592}
13593
13594/// Result of `MultiIndex::droplevel` — either a MultiIndex (if 2+ levels remain)
13595/// or a plain Index (if reduced to 1 level).
13596#[derive(Debug, Clone, PartialEq)]
13597pub enum MultiIndexOrIndex {
13598    Multi(MultiIndex),
13599    Index(Index),
13600}
13601
13602#[cfg(test)]
13603mod tests {
13604    use fp_types::{Period, PeriodFreq, Scalar, Timedelta};
13605
13606    use super::{
13607        CategoricalIndex, DateOffset, DateRangeError, DatetimeIndex, Index, IndexLabel, MultiIndex,
13608        PeriodFields, PeriodIndex, RangeIndex, TimedeltaIndex, TimedeltaRangeError, align_union,
13609        apply_date_offset, bdate_range, date_range, infer_freq_from_timestamps, timedelta_range,
13610        validate_alignment_plan,
13611    };
13612
13613    fn int64_labels(index: &Index) -> Vec<i64> {
13614        index
13615            .labels()
13616            .iter()
13617            .filter_map(|label| match label {
13618                IndexLabel::Int64(value) => Some(*value),
13619                _ => None,
13620            })
13621            .collect()
13622    }
13623
13624    /// Regression lock for br-frankenpandas-i3t8. `Index` must stay
13625    /// `Send + Sync` so `DataFrame` can be wrapped in `Arc` and shared
13626    /// across reader threads. A future refactor that reintroduces
13627    /// `std::cell::OnceCell` (or any `!Sync` interior-mutability primitive)
13628    /// breaks this test at compile time.
13629    #[test]
13630    fn index_is_send_and_sync() {
13631        fn assert_send_sync<T: Send + Sync>() {}
13632        assert_send_sync::<Index>();
13633        assert_send_sync::<MultiIndex>();
13634    }
13635
13636    #[test]
13637    fn bdate_range_rolls_weekend_start_forward() {
13638        let idx = bdate_range(Some("2024-01-06"), None, Some(3), None).unwrap();
13639        assert_eq!(
13640            idx.labels(),
13641            &[
13642                IndexLabel::Datetime64(1_704_672_000_000_000_000),
13643                IndexLabel::Datetime64(1_704_758_400_000_000_000),
13644                IndexLabel::Datetime64(1_704_844_800_000_000_000),
13645            ]
13646        );
13647    }
13648
13649    #[test]
13650    fn bdate_range_rolls_weekend_end_backward_and_preserves_name() {
13651        let idx = bdate_range(None, Some("2024-01-07"), Some(3), Some("biz")).unwrap();
13652        assert_eq!(
13653            idx.labels(),
13654            &[
13655                IndexLabel::Datetime64(1_704_240_000_000_000_000),
13656                IndexLabel::Datetime64(1_704_326_400_000_000_000),
13657                IndexLabel::Datetime64(1_704_412_800_000_000_000),
13658            ]
13659        );
13660        assert_eq!(idx.name(), Some("biz"));
13661    }
13662
13663    #[test]
13664    fn timedelta_range_rejects_over_specified_parameters() {
13665        let err = timedelta_range(
13666            Some(Timedelta::NANOS_PER_DAY),
13667            Some(3 * Timedelta::NANOS_PER_DAY),
13668            Some(2),
13669            Timedelta::NANOS_PER_DAY,
13670            None,
13671        )
13672        .expect_err("start + end + periods with explicit freq must fail closed");
13673        assert!(matches!(err, TimedeltaRangeError::TooManyParams));
13674    }
13675
13676    #[test]
13677    fn date_range_rejects_over_specified_parameters() {
13678        let err = date_range(
13679            Some("2020-01-01"),
13680            Some("2020-01-03"),
13681            Some(2),
13682            Timedelta::NANOS_PER_DAY,
13683            None,
13684        )
13685        .expect_err("start + end + periods with explicit freq must fail closed");
13686        assert!(matches!(err, DateRangeError::TooManyParams));
13687    }
13688
13689    #[test]
13690    fn date_range_rejects_generated_timestamp_overflow() {
13691        let err = date_range(
13692            Some("2262-04-11 23:47:16"),
13693            None,
13694            Some(3),
13695            Timedelta::NANOS_PER_SEC,
13696            None,
13697        )
13698        .expect_err("overflow past i64::MAX nanos must fail closed");
13699        assert!(matches!(err, DateRangeError::InvalidRange));
13700    }
13701
13702    #[test]
13703    fn date_range_rejects_backfilled_timestamp_underflow() {
13704        let err = date_range(
13705            None,
13706            Some("1677-09-21 00:12:44"),
13707            Some(3),
13708            Timedelta::NANOS_PER_SEC,
13709            None,
13710        )
13711        .expect_err("underflow before i64::MIN nanos must fail closed");
13712        assert!(matches!(err, DateRangeError::InvalidRange));
13713    }
13714
13715    #[test]
13716    fn date_range_rejects_out_of_bounds_timestamp_parse() {
13717        let err = date_range(
13718            Some("2263-01-01"),
13719            None,
13720            Some(1),
13721            Timedelta::NANOS_PER_DAY,
13722            None,
13723        )
13724        .expect_err("out-of-bounds timestamps must not be coerced to i64::MIN");
13725        assert!(matches!(err, DateRangeError::InvalidRange));
13726    }
13727
13728    #[test]
13729    fn date_offset_business_day_skips_weekend() {
13730        let nanos = apply_date_offset("2024-01-05", DateOffset::BusinessDay(1)).unwrap();
13731        assert_eq!(nanos, 1_704_672_000_000_000_000);
13732    }
13733
13734    #[test]
13735    fn date_offset_month_end_handles_leap_year() {
13736        let nanos = apply_date_offset("2024-02-10", DateOffset::MonthEnd(1)).unwrap();
13737        assert_eq!(nanos, 1_709_164_800_000_000_000);
13738    }
13739
13740    #[test]
13741    fn infer_freq_detects_fixed_and_calendar_offsets() {
13742        assert_eq!(
13743            infer_freq_from_timestamps(&["2024-01-01", "2024-01-03", "2024-01-05"]).unwrap(),
13744            Some("2D".to_owned())
13745        );
13746        assert_eq!(
13747            infer_freq_from_timestamps(&[
13748                "2024-01-01",
13749                "2024-01-02",
13750                "2024-01-03",
13751                "2024-01-04",
13752                "2024-01-05",
13753                "2024-01-08",
13754                "2024-01-09",
13755            ])
13756            .unwrap(),
13757            Some("B".to_owned())
13758        );
13759        assert_eq!(
13760            infer_freq_from_timestamps(&["2024-01-31", "2024-02-29", "2024-03-31"]).unwrap(),
13761            Some("ME".to_owned())
13762        );
13763    }
13764
13765    #[test]
13766    fn infer_freq_returns_none_for_irregular_or_duplicate_values() {
13767        assert_eq!(
13768            infer_freq_from_timestamps(&["2024-01-01", "2024-01-02", "2024-01-04"]).unwrap(),
13769            None
13770        );
13771        assert_eq!(
13772            infer_freq_from_timestamps(&["2024-01-01", "2024-01-02", "2024-01-02"]).unwrap(),
13773            None
13774        );
13775    }
13776
13777    #[test]
13778    fn union_alignment_preserves_left_then_right_unseen_order() {
13779        let left = Index::new(vec![1_i64.into(), 2_i64.into(), 4_i64.into()]);
13780        let right = Index::new(vec![2_i64.into(), 3_i64.into(), 4_i64.into()]);
13781
13782        let plan = align_union(&left, &right);
13783        assert_eq!(
13784            plan.union_index.labels(),
13785            &[
13786                IndexLabel::Int64(1),
13787                IndexLabel::Int64(2),
13788                IndexLabel::Int64(4),
13789                IndexLabel::Int64(3),
13790            ]
13791        );
13792        assert_eq!(plan.left_positions, vec![Some(0), Some(1), Some(2), None]);
13793        assert_eq!(plan.right_positions, vec![None, Some(0), Some(2), Some(1)]);
13794        validate_alignment_plan(&plan).expect("plan must be valid");
13795    }
13796
13797    #[test]
13798    fn duplicate_detection_matches_index_surface() {
13799        let index = Index::new(vec!["a".into(), "a".into(), "b".into()]);
13800        assert!(index.has_duplicates());
13801    }
13802
13803    #[test]
13804    fn has_duplicates_sort_fast_path_matches_hashmap_idxdup() {
13805        // The strict-ascending fast path in has_duplicates must agree with the
13806        // FxHashMap detect_duplicates for every shape: sorted-unique (fast path
13807        // returns false), sorted-with-dups (not strictly ascending -> Unsorted
13808        // -> hashmap), unsorted-unique, unsorted-dups, descending, single,
13809        // empty, and Utf8.
13810        let cases: Vec<Vec<IndexLabel>> = vec![
13811            vec![],
13812            vec![5_i64.into()],
13813            vec![1_i64.into(), 2_i64.into(), 3_i64.into()], // sorted unique
13814            vec![1_i64.into(), 5_i64.into(), 9_i64.into()], // sorted unique, gapped
13815            vec![1_i64.into(), 2_i64.into(), 2_i64.into()], // sorted with dup
13816            vec![3_i64.into(), 1_i64.into(), 2_i64.into()], // unsorted unique
13817            vec![3_i64.into(), 1_i64.into(), 3_i64.into()], // unsorted dup
13818            vec![9_i64.into(), 5_i64.into(), 1_i64.into()], // descending unique
13819            vec!["a".into(), "b".into(), "c".into()],       // sorted utf8 unique
13820            vec!["a".into(), "a".into(), "b".into()],       // utf8 dup
13821            vec!["c".into(), "a".into(), "b".into()],       // unsorted utf8 unique
13822        ];
13823        for labels in cases {
13824            let expected = super::detect_duplicates(&labels);
13825            let got = Index::new(labels.clone()).has_duplicates();
13826            assert_eq!(got, expected, "mismatch for {labels:?}");
13827        }
13828    }
13829
13830    #[test]
13831    fn dedup_family_sort_fast_path_matches_reference_idxdup() {
13832        // unique / duplicated / drop_duplicates strict-ascending fast paths must
13833        // equal an independent first-seen reference for every shape.
13834        let cases: Vec<Vec<IndexLabel>> = vec![
13835            vec![],
13836            vec![7_i64.into()],
13837            vec![1_i64.into(), 2_i64.into(), 3_i64.into()], // sorted unique
13838            vec![1_i64.into(), 2_i64.into(), 2_i64.into(), 4_i64.into()], // sorted dup
13839            vec![3_i64.into(), 1_i64.into(), 3_i64.into(), 2_i64.into()], // unsorted dup
13840            vec![9_i64.into(), 5_i64.into(), 1_i64.into()], // descending
13841            vec!["a".into(), "b".into(), "c".into()],       // sorted utf8
13842            vec!["b".into(), "a".into(), "b".into()],       // unsorted utf8 dup
13843        ];
13844        for labels in cases {
13845            let idx = Index::new(labels.clone());
13846
13847            let mut seen = std::collections::HashSet::new();
13848            let ref_unique: Vec<IndexLabel> = labels
13849                .iter()
13850                .filter(|l| seen.insert((*l).clone()))
13851                .cloned()
13852                .collect();
13853            assert_eq!(idx.unique().labels(), ref_unique.as_slice(), "unique {labels:?}");
13854
13855            let mut seen_f = std::collections::HashSet::new();
13856            let ref_dup_first: Vec<bool> = labels
13857                .iter()
13858                .map(|l| !seen_f.insert(l.clone()))
13859                .collect();
13860            assert_eq!(
13861                idx.duplicated(DuplicateKeep::First),
13862                ref_dup_first,
13863                "duplicated(First) {labels:?}"
13864            );
13865
13866            assert_eq!(
13867                idx.drop_duplicates().labels(),
13868                ref_unique.as_slice(),
13869                "drop_duplicates {labels:?}"
13870            );
13871        }
13872    }
13873
13874    #[test]
13875    fn sorted_merge_set_ops_match_reference_idxdup() {
13876        // intersection / difference must equal the self-ordered, deduped
13877        // FxHashMap reference whether the two-pointer fast path fires (both
13878        // strictly ascending) or the hash path runs (any side unsorted).
13879        let s = |v: &[i64]| v.iter().map(|x| IndexLabel::Int64(*x)).collect::<Vec<_>>();
13880        let pairs: Vec<(Vec<IndexLabel>, Vec<IndexLabel>)> = vec![
13881            (s(&[1, 2, 3, 5]), s(&[2, 3, 4])),       // both sorted, overlap
13882            (s(&[1, 2, 3]), s(&[4, 5, 6])),          // both sorted, disjoint
13883            (s(&[1, 2, 3]), s(&[1, 2, 3])),          // identical
13884            (s(&[1, 2, 3]), vec![]),                 // empty other
13885            (vec![], s(&[1, 2, 3])),                 // empty self
13886            (s(&[3, 1, 2]), s(&[2, 3])),             // self unsorted -> hash path
13887            (s(&[1, 2, 3]), s(&[3, 1])),             // other unsorted -> hash path
13888            (vec!["a".into(), "c".into(), "e".into()], vec!["b".into(), "c".into()]), // utf8 sorted
13889            (
13890                vec![1_i64.into(), 2_i64.into()],
13891                vec!["a".into(), "b".into()],
13892            ), // mixed-type sorted, disjoint by Ord variant
13893        ];
13894        for (a, b) in pairs {
13895            let ia = Index::new(a.clone());
13896            let ib = Index::new(b.clone());
13897            let bset: std::collections::HashSet<IndexLabel> = b.iter().cloned().collect();
13898
13899            let mut seen = std::collections::HashSet::new();
13900            let ref_inter: Vec<IndexLabel> = a
13901                .iter()
13902                .filter(|l| bset.contains(*l) && seen.insert((*l).clone()))
13903                .cloned()
13904                .collect();
13905            assert_eq!(
13906                ia.intersection(&ib).labels(),
13907                ref_inter.as_slice(),
13908                "intersection {a:?} ∩ {b:?}"
13909            );
13910
13911            let mut seen_d = std::collections::HashSet::new();
13912            let ref_diff: Vec<IndexLabel> = a
13913                .iter()
13914                .filter(|l| !bset.contains(*l) && seen_d.insert((*l).clone()))
13915                .cloned()
13916                .collect();
13917            assert_eq!(
13918                ia.difference(&ib).labels(),
13919                ref_diff.as_slice(),
13920                "difference {a:?} \\ {b:?}"
13921            );
13922        }
13923    }
13924
13925    #[test]
13926    fn datetime_timedelta_get_loc_binary_search_matches_linear_idxdup() {
13927        // get_loc now binary-searches a monotonic typed index; the result must
13928        // equal a linear first-match reference for both sorted (binary path) and
13929        // unsorted (linear path) value vectors.
13930        for nanos in [
13931            vec![10_i64, 20, 30, 40, 50], // sorted -> AscendingDatetime64/Timedelta64
13932            vec![30_i64, 10, 50, 20, 40], // unsorted -> linear fallback
13933        ] {
13934            let dt = DatetimeIndex::new(nanos.clone());
13935            let td = TimedeltaIndex::new(nanos.clone());
13936            for q in [10_i64, 20, 30, 40, 50, 0, 99] {
13937                let expected = nanos.iter().position(|n| *n == q);
13938                assert_eq!(dt.get_loc(q).ok(), expected, "datetime nanos={nanos:?} q={q}");
13939                assert_eq!(td.get_loc(q).ok(), expected, "timedelta nanos={nanos:?} q={q}");
13940            }
13941        }
13942    }
13943
13944    #[test]
13945    fn get_indexer_sorted_fast_path_matches_reference_idxdup() {
13946        // get_indexer's sorted merge / binary-search fast paths and the
13947        // FxHashMap fallback must all equal a first-occurrence reference.
13948        let s = |v: &[i64]| v.iter().map(|x| IndexLabel::Int64(*x)).collect::<Vec<_>>();
13949        let cases: Vec<(Vec<IndexLabel>, Vec<IndexLabel>)> = vec![
13950            (s(&[1, 2, 3, 4, 5]), s(&[2, 4, 6])),     // both sorted
13951            (s(&[1, 2, 3, 4, 5]), s(&[5, 1, 3, 9])),  // self sorted, target unsorted
13952            (s(&[3, 1, 5, 2]), s(&[1, 2, 3])),        // self unsorted -> hash path
13953            (s(&[1, 2, 3]), vec![]),                  // empty target
13954            (vec![], s(&[1, 2])),                     // empty self
13955            (
13956                vec!["a".into(), "c".into(), "e".into()],
13957                vec!["c".into(), "z".into(), "a".into()],
13958            ), // utf8, target unsorted
13959            (s(&[1, 2, 3]), vec!["a".into()]), // mixed type sorted, no match
13960        ];
13961        for (a, b) in cases {
13962            let ia = Index::new(a.clone());
13963            let ib = Index::new(b.clone());
13964            let ref_out: Vec<Option<usize>> =
13965                b.iter().map(|t| a.iter().position(|x| x == t)).collect();
13966            assert_eq!(ia.get_indexer(&ib), ref_out, "get_indexer {a:?} -> {b:?}");
13967        }
13968    }
13969
13970    #[test]
13971    fn known_unique_constructor_seeds_duplicate_cache() {
13972        let index = Index::new_known_unique(vec![1_i64.into(), 2_i64.into(), 3_i64.into()]);
13973        assert_eq!(index.duplicate_cache.get(), Some(&false));
13974        assert!(!index.has_duplicates());
13975    }
13976
13977    #[test]
13978    fn index_equality_ignores_duplicate_cache_state() {
13979        let index_with_cache = Index::new(vec!["a".into(), "a".into(), "b".into()]);
13980        assert!(index_with_cache.has_duplicates());
13981
13982        let fresh_index = Index::new(vec!["a".into(), "a".into(), "b".into()]);
13983        assert_eq!(index_with_cache, fresh_index);
13984    }
13985
13986    #[test]
13987    fn index_label_identity_cache_preserves_equality_contracts() {
13988        let base = Index::new(vec![1_i64.into(), 2_i64.into(), 3_i64.into()]);
13989        let clone = base.clone();
13990        assert_eq!(base.label_identity, clone.label_identity);
13991        assert_eq!(base, clone);
13992
13993        let renamed = clone.rename_index(Some("rows"));
13994        assert_eq!(base.label_identity, renamed.label_identity);
13995        assert!(base.equals(&renamed));
13996        assert!(!base.identical(&renamed));
13997
13998        let independent_equal = Index::new(vec![1_i64.into(), 2_i64.into(), 3_i64.into()]);
13999        assert_ne!(base.label_identity, independent_equal.label_identity);
14000        assert_eq!(base, independent_equal);
14001
14002        let different = Index::new(vec![1_i64.into(), 2_i64.into(), 4_i64.into()]);
14003        assert_ne!(base, different);
14004        assert!(!base.equals(&different));
14005    }
14006
14007    #[test]
14008    fn semantic_fingerprint_cache_reuses_label_result() {
14009        let index = Index::new(vec![1_i64.into(), 2_i64.into(), 3_i64.into()]);
14010        let calls = std::cell::Cell::new(0);
14011
14012        let first = index.semantic_labels_fingerprint_with(|labels| {
14013            calls.set(calls.get() + 1);
14014            format!("labels:{}", labels.len())
14015        });
14016        let second = index.semantic_labels_fingerprint_with(|_| {
14017            calls.set(calls.get() + 1);
14018            "changed".to_owned()
14019        });
14020
14021        assert_eq!(first, "labels:3");
14022        assert_eq!(second, "labels:3");
14023        assert_eq!(calls.get(), 1);
14024    }
14025
14026    #[test]
14027    fn int64_unit_range_index_preserves_materialized_surface() {
14028        let index = Index::new_known_unique_int64_unit_range(-2, 4).rename_index(Some("idx"));
14029        let reference = Index::new(vec![
14030            IndexLabel::Int64(-2),
14031            IndexLabel::Int64(-1),
14032            IndexLabel::Int64(0),
14033            IndexLabel::Int64(1),
14034        ])
14035        .rename_index(Some("idx"));
14036
14037        assert_eq!(index.len(), 4);
14038        assert!(!index.has_duplicates());
14039        assert!(index.is_sorted());
14040        assert_eq!(index.position(&IndexLabel::Int64(0)), Some(2));
14041        assert_eq!(index.position(&IndexLabel::Int64(2)), None);
14042        assert_eq!(index.labels(), reference.labels());
14043        assert_eq!(index, reference);
14044    }
14045
14046    #[test]
14047    fn index_variant_wrappers_expose_public_type_surface() {
14048        let range = RangeIndex::new(1, 7, 2).unwrap().set_name("row");
14049        assert_eq!(range.values(), vec![1, 3, 5]);
14050        assert_eq!(range.to_list(), range.values());
14051        assert_eq!(range.tolist(), range.values());
14052        assert_eq!(range.to_numpy(), range.values());
14053        assert_eq!(range.array(), range.values());
14054        assert_eq!(range.len(), 3);
14055        assert_eq!(range.size(), 3);
14056        assert_eq!(range.shape(), (3,));
14057        assert!(!range.empty());
14058        assert_eq!(range.dtype(), "int64");
14059        assert_eq!(range.dtypes(), vec!["int64"]);
14060        assert_eq!(range.names(), vec![Some("row".to_owned())]);
14061        assert_eq!(range.copy(), range);
14062        assert_eq!(range.rename_index(None).name(), None);
14063        assert_eq!(range.nbytes(), 3 * std::mem::size_of::<i64>());
14064        assert_eq!(range.to_index().name(), Some("row"));
14065        assert!(RangeIndex::new(0, 5, 0).is_err());
14066
14067        let dt = DatetimeIndex::new(vec![1_706_918_400_000_000_000, i64::MIN]).set_name("when");
14068        assert_eq!(dt.year(), vec![Some(2024), None]);
14069        assert_eq!(dt.month(), vec![Some(2), None]);
14070        assert_eq!(dt.day(), vec![Some(3), None]);
14071        assert_eq!(dt.values(), vec![Some(1_706_918_400_000_000_000), None]);
14072        assert_eq!(dt.to_list(), dt.values());
14073        assert_eq!(dt.tolist(), dt.values());
14074        assert_eq!(dt.to_numpy(), dt.values());
14075        assert_eq!(dt.array(), dt.values());
14076        assert_eq!(dt.size(), 2);
14077        assert_eq!(dt.shape(), (2,));
14078        assert!(!dt.empty());
14079        assert_eq!(dt.dtype(), "datetime64[ns]");
14080        assert_eq!(dt.dtypes(), vec!["datetime64[ns]"]);
14081        assert_eq!(dt.names(), vec![Some("when".to_owned())]);
14082        assert_eq!(dt.copy(), dt);
14083        assert!(dt.hasnans());
14084        assert_eq!(dt.isna(), vec![false, true]);
14085        assert_eq!(dt.notna(), vec![true, false]);
14086        assert!(dt.nbytes() <= dt.memory_usage(true));
14087        assert!(DatetimeIndex::from_index(Index::from_i64(vec![1])).is_err());
14088
14089        let td = TimedeltaIndex::new(vec![90_061_000_000_000, Timedelta::NAT]).set_name("delta");
14090        assert_eq!(td.days(), vec![Some(1), None]);
14091        assert_eq!(td.seconds(), vec![Some(3661), None]);
14092        assert_eq!(td.total_seconds(), vec![Some(90061.0), None]);
14093        assert_eq!(td.values(), vec![Some(90_061_000_000_000), None]);
14094        assert_eq!(td.to_list(), td.values());
14095        assert_eq!(td.tolist(), td.values());
14096        assert_eq!(td.to_numpy(), td.values());
14097        assert_eq!(td.array(), td.values());
14098        assert_eq!(td.size(), 2);
14099        assert_eq!(td.shape(), (2,));
14100        assert!(!td.empty());
14101        assert_eq!(td.dtype(), "timedelta64[ns]");
14102        assert_eq!(td.dtypes(), vec!["timedelta64[ns]"]);
14103        assert_eq!(td.names(), vec![Some("delta".to_owned())]);
14104        assert_eq!(td.copy(), td);
14105        assert!(td.hasnans());
14106        assert_eq!(td.isna(), vec![false, true]);
14107        assert_eq!(td.notna(), vec![true, false]);
14108
14109        let period =
14110            PeriodIndex::from_range(Period::new(10, PeriodFreq::Monthly), 3).set_name("period");
14111        assert_eq!(period.freq(), Some(PeriodFreq::Monthly));
14112        assert_eq!(
14113            period.values(),
14114            &[
14115                Period::new(10, PeriodFreq::Monthly),
14116                Period::new(11, PeriodFreq::Monthly),
14117                Period::new(12, PeriodFreq::Monthly),
14118            ]
14119        );
14120        assert_eq!(period.to_list(), period.values());
14121        assert_eq!(period.tolist(), period.values());
14122        assert_eq!(period.to_numpy(), period.values());
14123        assert_eq!(period.array(), period.values());
14124        assert_eq!(period.size(), 3);
14125        assert_eq!(period.shape(), (3,));
14126        assert!(!period.empty());
14127        assert_eq!(period.dtype(), "period[M]");
14128        assert_eq!(period.dtypes(), vec!["period[M]".to_owned()]);
14129        assert_eq!(period.names(), vec![Some("period".to_owned())]);
14130        assert_eq!(period.copy(), period);
14131        assert_eq!(period.rename_index(None).name(), None);
14132        assert!(period.nbytes() <= period.memory_usage(true));
14133        assert_eq!(period.to_index().name(), Some("period"));
14134
14135        let categorical = CategoricalIndex::from_values(
14136            vec!["low".to_owned(), "high".to_owned(), "low".to_owned()],
14137            true,
14138        )
14139        .set_name("priority");
14140        assert_eq!(categorical.categories(), &["low", "high"]);
14141        assert_eq!(categorical.codes(), vec![Some(0), Some(1), Some(0)]);
14142        assert!(categorical.ordered());
14143        assert_eq!(
14144            categorical.values(),
14145            vec!["low".to_owned(), "high".to_owned(), "low".to_owned()]
14146        );
14147        assert_eq!(categorical.to_list(), categorical.values());
14148        assert_eq!(categorical.tolist(), categorical.values());
14149        assert_eq!(categorical.to_numpy(), categorical.values());
14150        assert_eq!(categorical.array(), categorical.values());
14151        assert_eq!(categorical.size(), 3);
14152        assert_eq!(categorical.shape(), (3,));
14153        assert!(!categorical.empty());
14154        assert_eq!(categorical.dtype(), "category");
14155        assert_eq!(categorical.dtypes(), vec!["category"]);
14156        assert_eq!(categorical.names(), vec![Some("priority".to_owned())]);
14157        assert_eq!(categorical.copy(), categorical);
14158        assert_eq!(categorical.isna(), vec![false, false, false]);
14159        assert_eq!(categorical.notna(), vec![true, true, true]);
14160        assert!(categorical.nbytes() <= categorical.memory_usage(true));
14161        assert_eq!(categorical.to_index().name(), Some("priority"));
14162        assert!(
14163            CategoricalIndex::with_categories(
14164                vec!["missing".to_owned()],
14165                vec!["known".to_owned()],
14166                false,
14167            )
14168            .is_err()
14169        );
14170    }
14171
14172    #[test]
14173    fn typed_index_str_accessors_forward_flat_labels_e7ms9() -> Result<(), super::IndexError> {
14174        let flat = Index::new(vec!["Alpha".into(), 1_i64.into(), "".into()]);
14175        assert_eq!(
14176            flat.r#str().lower(),
14177            vec![Some("alpha".to_owned()), None, Some(String::new())]
14178        );
14179
14180        let range = RangeIndex::new(1, 4, 1)?;
14181        assert_eq!(range.r#str().len(), vec![None, None, None]);
14182
14183        let dt = DatetimeIndex::new(vec![1_704_067_200_000_000_000]);
14184        assert_eq!(dt.r#str().upper(), vec![None]);
14185
14186        let td = TimedeltaIndex::new(vec![90_061_000_000_000]);
14187        assert_eq!(td.r#str().contains("day"), vec![None]);
14188
14189        let period = PeriodIndex::from_range(Period::new(10, PeriodFreq::Monthly), 2);
14190        let expected_period_lower: Vec<Option<String>> = period
14191            .format()
14192            .into_iter()
14193            .map(|label| Some(label.to_lowercase()))
14194            .collect();
14195        assert_eq!(period.r#str().lower(), expected_period_lower);
14196
14197        let categorical = CategoricalIndex::from_values(
14198            vec!["Low".to_owned(), "HIGH".to_owned(), String::new()],
14199            false,
14200        );
14201        assert_eq!(
14202            categorical.r#str().lower(),
14203            vec![
14204                Some("low".to_owned()),
14205                Some("high".to_owned()),
14206                Some(String::new())
14207            ]
14208        );
14209        Ok(())
14210    }
14211
14212    #[test]
14213    fn period_index_from_fields_builds_period_ordinals_th1fd() -> Result<(), super::IndexError> {
14214        let years = [2020, 2021];
14215        let months = [1, 2];
14216        let monthly = PeriodIndex::from_fields(PeriodFields {
14217            month: Some(&months),
14218            freq: Some(PeriodFreq::Monthly),
14219            ..PeriodFields::new(&years)
14220        })?;
14221        assert_eq!(
14222            monthly.values(),
14223            &[
14224                Period::new(600, PeriodFreq::Monthly),
14225                Period::new(613, PeriodFreq::Monthly)
14226            ]
14227        );
14228
14229        let quarter_years = [2020];
14230        let quarters = [2];
14231        let quarterly = PeriodIndex::from_fields(PeriodFields {
14232            quarter: Some(&quarters),
14233            ..PeriodFields::new(&quarter_years)
14234        })?;
14235        assert_eq!(
14236            quarterly.values(),
14237            &[Period::new(201, PeriodFreq::Quarterly)]
14238        );
14239
14240        let single_year = [2020];
14241        let single_month = [1];
14242        let weekly = PeriodIndex::from_fields(PeriodFields {
14243            month: Some(&single_month),
14244            freq: Some(PeriodFreq::Weekly),
14245            ..PeriodFields::new(&single_year)
14246        })?;
14247        assert_eq!(weekly.values(), &[Period::new(2_610, PeriodFreq::Weekly)]);
14248
14249        let weekend_day = [4];
14250        let business = PeriodIndex::from_fields(PeriodFields {
14251            month: Some(&single_month),
14252            day: Some(&weekend_day),
14253            freq: Some(PeriodFreq::Business),
14254            ..PeriodFields::new(&single_year)
14255        })?;
14256        assert_eq!(
14257            business.values(),
14258            &[Period::new(13_047, PeriodFreq::Business)]
14259        );
14260
14261        let days = [2];
14262        let hours = [3];
14263        let minutes = [4];
14264        let seconds = [5];
14265        let secondly = PeriodIndex::from_fields(PeriodFields {
14266            month: Some(&single_month),
14267            day: Some(&days),
14268            hour: Some(&hours),
14269            minute: Some(&minutes),
14270            second: Some(&seconds),
14271            freq: Some(PeriodFreq::Secondly),
14272            ..PeriodFields::new(&single_year)
14273        })?;
14274        let expected_date = chrono::NaiveDate::from_ymd_opt(2020, 1, 2)
14275            .ok_or_else(|| super::IndexError::InvalidArgument("invalid test date".to_owned()))?;
14276        let expected_time = chrono::NaiveTime::from_hms_opt(3, 4, 5)
14277            .ok_or_else(|| super::IndexError::InvalidArgument("invalid test time".to_owned()))?;
14278        let expected_nanos =
14279            super::date_and_time_to_nanos(expected_date, super::time_to_nanos(expected_time))
14280                .map_err(super::period_date_error)?;
14281        assert_eq!(
14282            secondly.values(),
14283            &[Period::new(
14284                super::datetime_period_ordinal(expected_nanos, PeriodFreq::Secondly)?,
14285                PeriodFreq::Secondly
14286            )]
14287        );
14288
14289        assert!(
14290            PeriodIndex::from_fields(PeriodFields {
14291                month: Some(&months),
14292                freq: Some(PeriodFreq::Monthly),
14293                ..PeriodFields::new(&single_year)
14294            })
14295            .is_err()
14296        );
14297        let invalid_month = [13];
14298        assert!(
14299            PeriodIndex::from_fields(PeriodFields {
14300                month: Some(&invalid_month),
14301                freq: Some(PeriodFreq::Monthly),
14302                ..PeriodFields::new(&single_year)
14303            })
14304            .is_err()
14305        );
14306        let invalid_day = [99];
14307        assert_eq!(
14308            PeriodIndex::from_fields(PeriodFields {
14309                month: Some(&single_month),
14310                day: Some(&invalid_day),
14311                freq: Some(PeriodFreq::Monthly),
14312                ..PeriodFields::new(&single_year)
14313            })?
14314            .values(),
14315            &[Period::new(600, PeriodFreq::Monthly)]
14316        );
14317        assert!(
14318            PeriodIndex::from_fields(PeriodFields {
14319                quarter: Some(&quarters),
14320                month: Some(&single_month),
14321                freq: Some(PeriodFreq::Monthly),
14322                ..PeriodFields::new(&single_year)
14323            })
14324            .is_err()
14325        );
14326        Ok(())
14327    }
14328
14329    #[test]
14330    fn index_variant_wrappers_expose_identity_and_type_surface() {
14331        let range = RangeIndex::new(1, 7, 2).unwrap().set_name("row");
14332        assert!(range.is_(&range));
14333        assert!(range.equals(&range.copy()));
14334        assert!(range.identical(&range.copy()));
14335        assert!(!range.identical(&range.rename_index(None)));
14336        assert!(range.is_unique());
14337        assert!(!range.has_duplicates());
14338        assert!(range.is_monotonic_increasing());
14339        assert!(!range.is_monotonic_decreasing());
14340        assert_eq!(range.nunique(), 3);
14341        assert_eq!(range.ndim(), 1);
14342        assert_eq!(RangeIndex::new(4, 5, 1).unwrap().item().unwrap(), 4);
14343        assert!(range.item().is_err());
14344        assert!(range.holds_integer());
14345        assert_eq!(range.inferred_type(), "integer");
14346        assert!(range.is_integer());
14347        assert!(range.is_numeric());
14348        assert!(!range.is_boolean());
14349        assert!(!range.is_categorical());
14350        assert!(!range.is_floating());
14351        assert!(!range.is_interval());
14352        assert!(!range.is_object());
14353
14354        let dt = DatetimeIndex::new(vec![1_706_918_400_000_000_000, i64::MIN]).set_name("when");
14355        assert!(dt.is_(&dt));
14356        assert!(dt.equals(&dt.copy()));
14357        assert!(dt.identical(&dt.copy()));
14358        assert!(!dt.identical(&dt.rename_index(None)));
14359        assert!(dt.is_unique());
14360        assert!(!dt.has_duplicates());
14361        assert_eq!(dt.nunique(), 1);
14362        assert_eq!(dt.nunique_with_dropna(false), 2);
14363        assert_eq!(dt.ndim(), 1);
14364        assert_eq!(
14365            DatetimeIndex::new(vec![1_706_918_400_000_000_000])
14366                .item()
14367                .unwrap(),
14368            Some(1_706_918_400_000_000_000)
14369        );
14370        assert_eq!(DatetimeIndex::new(vec![i64::MIN]).item().unwrap(), None);
14371        assert_eq!(dt.inferred_type(), "datetime64");
14372        assert!(!dt.holds_integer());
14373        assert!(!dt.is_integer());
14374        assert!(!dt.is_numeric());
14375        assert!(!dt.is_boolean());
14376        assert!(!dt.is_categorical());
14377        assert!(!dt.is_floating());
14378        assert!(!dt.is_interval());
14379        assert!(!dt.is_object());
14380        assert!(DatetimeIndex::new(vec![1, 2]).is_monotonic_increasing());
14381        assert!(DatetimeIndex::new(vec![2, 1]).is_monotonic_decreasing());
14382
14383        let td = TimedeltaIndex::new(vec![1, Timedelta::NAT]).set_name("delta");
14384        assert!(td.is_(&td));
14385        assert!(td.equals(&td.copy()));
14386        assert!(td.identical(&td.copy()));
14387        assert!(!td.identical(&td.rename_index(None)));
14388        assert!(td.is_unique());
14389        assert_eq!(td.nunique(), 1);
14390        assert_eq!(td.nunique_with_dropna(false), 2);
14391        assert_eq!(td.ndim(), 1);
14392        assert_eq!(TimedeltaIndex::new(vec![7]).item().unwrap(), Some(7));
14393        assert_eq!(
14394            TimedeltaIndex::new(vec![Timedelta::NAT]).item().unwrap(),
14395            None
14396        );
14397        assert_eq!(td.inferred_type(), "timedelta64");
14398        assert!(!td.holds_integer());
14399        assert!(!td.is_integer());
14400        assert!(!td.is_numeric());
14401        assert!(!td.is_boolean());
14402        assert!(!td.is_categorical());
14403        assert!(!td.is_floating());
14404        assert!(!td.is_interval());
14405        assert!(!td.is_object());
14406        assert!(TimedeltaIndex::new(vec![1, 2]).is_monotonic_increasing());
14407        assert!(TimedeltaIndex::new(vec![2, 1]).is_monotonic_decreasing());
14408
14409        let period =
14410            PeriodIndex::from_range(Period::new(10, PeriodFreq::Monthly), 3).set_name("period");
14411        assert!(period.is_(&period));
14412        assert!(period.equals(&period.copy()));
14413        assert!(period.identical(&period.copy()));
14414        assert!(!period.identical(&period.rename_index(None)));
14415        assert!(period.is_unique());
14416        assert!(!period.has_duplicates());
14417        assert!(period.is_monotonic_increasing());
14418        assert!(!period.is_monotonic_decreasing());
14419        assert_eq!(period.nunique(), 3);
14420        assert_eq!(period.ndim(), 1);
14421        assert_eq!(
14422            PeriodIndex::new(vec![Period::new(42, PeriodFreq::Daily)])
14423                .item()
14424                .unwrap(),
14425            Period::new(42, PeriodFreq::Daily)
14426        );
14427        assert_eq!(period.inferred_type(), "period");
14428        assert!(!period.holds_integer());
14429        assert!(!period.is_integer());
14430        assert!(!period.is_numeric());
14431        assert!(!period.is_boolean());
14432        assert!(!period.is_categorical());
14433        assert!(!period.is_floating());
14434        assert!(!period.is_interval());
14435        assert!(!period.is_object());
14436
14437        let categorical = CategoricalIndex::from_values(
14438            vec!["low".to_owned(), "high".to_owned(), "low".to_owned()],
14439            true,
14440        )
14441        .set_name("priority");
14442        assert!(categorical.is_(&categorical));
14443        assert!(categorical.equals(&categorical.copy()));
14444        assert!(categorical.identical(&categorical.copy()));
14445        assert!(!categorical.identical(&categorical.rename_index(None)));
14446        assert!(!categorical.is_unique());
14447        assert!(categorical.has_duplicates());
14448        assert_eq!(categorical.nunique(), 2);
14449        assert_eq!(categorical.ndim(), 1);
14450        assert_eq!(
14451            CategoricalIndex::from_values(vec!["high".to_owned()], true)
14452                .item()
14453                .unwrap(),
14454            "high"
14455        );
14456        assert_eq!(categorical.inferred_type(), "categorical");
14457        assert!(!categorical.holds_integer());
14458        assert!(!categorical.is_integer());
14459        assert!(!categorical.is_numeric());
14460        assert!(!categorical.is_boolean());
14461        assert!(categorical.is_categorical());
14462        assert!(!categorical.is_floating());
14463        assert!(!categorical.is_interval());
14464        assert!(!categorical.is_object());
14465        assert!(!categorical.is_monotonic_increasing());
14466        assert!(!categorical.is_monotonic_decreasing());
14467        assert!(
14468            CategoricalIndex::from_values(vec!["low".to_owned(), "high".to_owned()], true)
14469                .is_monotonic_increasing()
14470        );
14471    }
14472
14473    // === AG-13: Adaptive Index Backend Tests ===
14474
14475    #[test]
14476    fn sorted_int64_index_detected() {
14477        let index = Index::from_i64(vec![1, 2, 3, 4, 5]);
14478        assert!(index.is_sorted());
14479    }
14480
14481    #[test]
14482    fn unsorted_int64_index_detected() {
14483        let index = Index::from_i64(vec![3, 1, 2]);
14484        assert!(!index.is_sorted());
14485    }
14486
14487    #[test]
14488    fn sorted_utf8_index_detected() {
14489        let index = Index::from_utf8(vec!["a".into(), "b".into(), "c".into()]);
14490        assert!(index.is_sorted());
14491    }
14492
14493    #[test]
14494    fn unsorted_utf8_index_detected() {
14495        let index = Index::from_utf8(vec!["c".into(), "a".into(), "b".into()]);
14496        assert!(!index.is_sorted());
14497    }
14498
14499    #[test]
14500    fn duplicate_int64_is_not_sorted() {
14501        let index = Index::from_i64(vec![1, 2, 2, 3]);
14502        assert!(!index.is_sorted());
14503    }
14504
14505    #[test]
14506    fn empty_index_is_sorted() {
14507        let index = Index::new(vec![]);
14508        assert!(index.is_sorted());
14509    }
14510
14511    #[test]
14512    fn single_element_is_sorted() {
14513        let index = Index::from_i64(vec![42]);
14514        assert!(index.is_sorted());
14515    }
14516
14517    #[test]
14518    fn binary_search_position_sorted_int64() {
14519        let index = Index::from_i64(vec![10, 20, 30, 40, 50]);
14520        assert_eq!(index.position(&IndexLabel::Int64(10)), Some(0));
14521        assert_eq!(index.position(&IndexLabel::Int64(30)), Some(2));
14522        assert_eq!(index.position(&IndexLabel::Int64(50)), Some(4));
14523        assert_eq!(index.position(&IndexLabel::Int64(25)), None);
14524        assert_eq!(index.position(&IndexLabel::Int64(0)), None);
14525        assert_eq!(index.position(&IndexLabel::Int64(100)), None);
14526    }
14527
14528    #[test]
14529    fn binary_search_position_sorted_utf8() {
14530        let index = Index::from_utf8(vec!["apple".into(), "banana".into(), "cherry".into()]);
14531        assert_eq!(index.position(&IndexLabel::Utf8("apple".into())), Some(0));
14532        assert_eq!(index.position(&IndexLabel::Utf8("banana".into())), Some(1));
14533        assert_eq!(index.position(&IndexLabel::Utf8("cherry".into())), Some(2));
14534        assert_eq!(index.position(&IndexLabel::Utf8("date".into())), None);
14535    }
14536
14537    #[test]
14538    fn type_mismatch_returns_none() {
14539        let int_index = Index::from_i64(vec![1, 2, 3]);
14540        // Looking for a Utf8 needle in an Int64 index
14541        assert_eq!(int_index.position(&IndexLabel::Utf8("1".into())), None);
14542
14543        let utf8_index = Index::from_utf8(vec!["a".into(), "b".into()]);
14544        // Looking for an Int64 needle in a Utf8 index
14545        assert_eq!(utf8_index.position(&IndexLabel::Int64(1)), None);
14546    }
14547
14548    #[test]
14549    fn linear_fallback_for_unsorted_index() {
14550        let index = Index::from_i64(vec![30, 10, 20]);
14551        assert!(!index.is_sorted());
14552        assert_eq!(index.position(&IndexLabel::Int64(30)), Some(0));
14553        assert_eq!(index.position(&IndexLabel::Int64(10)), Some(1));
14554        assert_eq!(index.position(&IndexLabel::Int64(20)), Some(2));
14555        assert_eq!(index.position(&IndexLabel::Int64(99)), None);
14556    }
14557
14558    #[test]
14559    fn binary_search_large_sorted_index() {
14560        // Verify binary search works correctly on a large sorted index.
14561        let labels: Vec<i64> = (0..10_000).collect();
14562        let index = Index::from_i64(labels);
14563        assert!(index.is_sorted());
14564
14565        // Check first, middle, last, and missing positions.
14566        assert_eq!(index.position(&IndexLabel::Int64(0)), Some(0));
14567        assert_eq!(index.position(&IndexLabel::Int64(5000)), Some(5000));
14568        assert_eq!(index.position(&IndexLabel::Int64(9999)), Some(9999));
14569        assert_eq!(index.position(&IndexLabel::Int64(10_000)), None);
14570        assert_eq!(index.position(&IndexLabel::Int64(-1)), None);
14571    }
14572
14573    #[test]
14574    fn sort_detection_is_cached() {
14575        let index = Index::from_i64(vec![1, 2, 3]);
14576        // First call computes and caches.
14577        assert!(index.is_sorted());
14578        // Second call should return same result from cache.
14579        assert!(index.is_sorted());
14580    }
14581
14582    #[test]
14583    fn mixed_label_types_are_unsorted() {
14584        let index = Index::new(vec![IndexLabel::Int64(1), IndexLabel::Utf8("a".into())]);
14585        assert!(!index.is_sorted());
14586    }
14587
14588    #[test]
14589    fn position_consistent_sorted_vs_unsorted() {
14590        // Verify that for a sorted index, binary search gives the same
14591        // results as a linear scan would.
14592        let sorted = Index::from_i64(vec![5, 10, 15, 20, 25]);
14593        assert!(sorted.is_sorted());
14594
14595        for &target in &[5, 10, 15, 20, 25, 0, 12, 30] {
14596            let needle = IndexLabel::Int64(target);
14597            let expected = sorted.labels().iter().position(|l| l == &needle);
14598            assert_eq!(
14599                sorted.position(&needle),
14600                expected,
14601                "mismatch for target={target}"
14602            );
14603        }
14604    }
14605
14606    // === bd-2gi.15: Alignment mode tests ===
14607
14608    use super::{AlignMode, align, align_inner, align_left};
14609
14610    #[test]
14611    fn align_inner_keeps_only_overlapping_labels() {
14612        let left = Index::new(vec![1_i64.into(), 2_i64.into(), 3_i64.into()]);
14613        let right = Index::new(vec![2_i64.into(), 3_i64.into(), 4_i64.into()]);
14614
14615        let plan = align_inner(&left, &right);
14616        assert_eq!(
14617            plan.union_index.labels(),
14618            &[IndexLabel::Int64(2), IndexLabel::Int64(3)]
14619        );
14620        assert_eq!(plan.left_positions, vec![Some(1), Some(2)]);
14621        assert_eq!(plan.right_positions, vec![Some(0), Some(1)]);
14622        validate_alignment_plan(&plan).expect("valid");
14623    }
14624
14625    #[test]
14626    fn align_inner_disjoint_yields_empty() {
14627        let left = Index::new(vec![1_i64.into(), 2_i64.into()]);
14628        let right = Index::new(vec![3_i64.into(), 4_i64.into()]);
14629
14630        let plan = align_inner(&left, &right);
14631        assert!(plan.union_index.is_empty());
14632        assert!(plan.left_positions.is_empty());
14633        assert!(plan.right_positions.is_empty());
14634    }
14635
14636    #[test]
14637    fn align_left_preserves_all_left_labels() {
14638        let left = Index::new(vec!["a".into(), "b".into(), "c".into()]);
14639        let right = Index::new(vec!["b".into(), "d".into()]);
14640
14641        let plan = align_left(&left, &right);
14642        assert_eq!(
14643            plan.union_index.labels(),
14644            &["a".into(), "b".into(), "c".into()]
14645        );
14646        assert_eq!(plan.left_positions, vec![Some(0), Some(1), Some(2)]);
14647        assert_eq!(plan.right_positions, vec![None, Some(0), None]);
14648        validate_alignment_plan(&plan).expect("valid");
14649    }
14650
14651    #[test]
14652    fn align_right_preserves_all_right_labels() {
14653        let left = Index::new(vec!["a".into(), "b".into()]);
14654        let right = Index::new(vec!["b".into(), "c".into(), "d".into()]);
14655
14656        let plan = align(&left, &right, AlignMode::Right);
14657        assert_eq!(
14658            plan.union_index.labels(),
14659            &["b".into(), "c".into(), "d".into()]
14660        );
14661        // Left has "b" at position 1.
14662        assert_eq!(plan.left_positions, vec![Some(1), None, None]);
14663        assert_eq!(plan.right_positions, vec![Some(0), Some(1), Some(2)]);
14664    }
14665
14666    #[test]
14667    fn align_mode_outer_matches_union() {
14668        let left = Index::new(vec![1_i64.into(), 2_i64.into()]);
14669        let right = Index::new(vec![2_i64.into(), 3_i64.into()]);
14670
14671        let plan_outer = align(&left, &right, AlignMode::Outer);
14672        let plan_union = align_union(&left, &right);
14673        assert_eq!(plan_outer, plan_union);
14674    }
14675
14676    #[test]
14677    fn align_inner_duplicate_labels_cartesian() {
14678        let left = Index::new(vec!["a".into(), "b".into(), "a".into()]);
14679        let right = Index::new(vec!["a".into(), "a".into(), "c".into()]);
14680
14681        let plan = align_inner(&left, &right);
14682        assert_eq!(
14683            plan.union_index.labels(),
14684            &["a".into(), "a".into(), "a".into(), "a".into()]
14685        );
14686        assert_eq!(
14687            plan.left_positions,
14688            vec![Some(0), Some(0), Some(2), Some(2)]
14689        );
14690        assert_eq!(
14691            plan.right_positions,
14692            vec![Some(0), Some(1), Some(0), Some(1)]
14693        );
14694        validate_alignment_plan(&plan).expect("valid");
14695    }
14696
14697    #[test]
14698    fn align_left_duplicate_labels_expand_right_matches() {
14699        let left = Index::new(vec!["a".into(), "b".into(), "a".into()]);
14700        let right = Index::new(vec!["a".into(), "a".into(), "c".into()]);
14701
14702        let plan = align_left(&left, &right);
14703        assert_eq!(
14704            plan.union_index.labels(),
14705            &["a".into(), "a".into(), "b".into(), "a".into(), "a".into()]
14706        );
14707        assert_eq!(
14708            plan.left_positions,
14709            vec![Some(0), Some(0), Some(1), Some(2), Some(2)]
14710        );
14711        assert_eq!(
14712            plan.right_positions,
14713            vec![Some(0), Some(1), None, Some(0), Some(1)]
14714        );
14715        validate_alignment_plan(&plan).expect("valid");
14716    }
14717
14718    #[test]
14719    fn align_right_duplicate_labels_expand_left_matches() {
14720        let left = Index::new(vec!["a".into(), "b".into(), "a".into()]);
14721        let right = Index::new(vec!["a".into(), "a".into(), "c".into()]);
14722
14723        let plan = align(&left, &right, AlignMode::Right);
14724        assert_eq!(
14725            plan.union_index.labels(),
14726            &["a".into(), "a".into(), "a".into(), "a".into(), "c".into()]
14727        );
14728        assert_eq!(
14729            plan.left_positions,
14730            vec![Some(0), Some(2), Some(0), Some(2), None]
14731        );
14732        assert_eq!(
14733            plan.right_positions,
14734            vec![Some(0), Some(0), Some(1), Some(1), Some(2)]
14735        );
14736        validate_alignment_plan(&plan).expect("valid");
14737    }
14738
14739    #[test]
14740    fn align_outer_duplicate_labels_preserves_left_order_and_right_only() {
14741        let left = Index::new(vec!["a".into(), "b".into(), "a".into()]);
14742        let right = Index::new(vec!["a".into(), "a".into(), "c".into()]);
14743
14744        let plan = align_union(&left, &right);
14745        assert_eq!(
14746            plan.union_index.labels(),
14747            &[
14748                "a".into(),
14749                "a".into(),
14750                "b".into(),
14751                "a".into(),
14752                "a".into(),
14753                "c".into()
14754            ]
14755        );
14756        assert_eq!(
14757            plan.left_positions,
14758            vec![Some(0), Some(0), Some(1), Some(2), Some(2), None]
14759        );
14760        assert_eq!(
14761            plan.right_positions,
14762            vec![Some(0), Some(1), None, Some(0), Some(1), Some(2)]
14763        );
14764        validate_alignment_plan(&plan).expect("valid");
14765    }
14766
14767    #[test]
14768    fn align_inner_identical_indexes() {
14769        let left = Index::new(vec!["x".into(), "y".into()]);
14770        let right = Index::new(vec!["x".into(), "y".into()]);
14771
14772        let plan = align_inner(&left, &right);
14773        assert_eq!(plan.union_index.labels(), &["x".into(), "y".into()]);
14774        assert_eq!(plan.left_positions, vec![Some(0), Some(1)]);
14775        assert_eq!(plan.right_positions, vec![Some(0), Some(1)]);
14776    }
14777
14778    #[test]
14779    fn align_left_identical_indexes() {
14780        let left = Index::new(vec![1_i64.into(), 2_i64.into()]);
14781        let right = Index::new(vec![1_i64.into(), 2_i64.into()]);
14782
14783        let plan = align_left(&left, &right);
14784        assert_eq!(plan.union_index.labels(), left.labels());
14785        assert_eq!(plan.left_positions, vec![Some(0), Some(1)]);
14786        assert_eq!(plan.right_positions, vec![Some(0), Some(1)]);
14787    }
14788
14789    #[test]
14790    fn align_inner_empty_input() {
14791        let left = Index::new(Vec::new());
14792        let right = Index::new(vec![1_i64.into()]);
14793
14794        let plan = align_inner(&left, &right);
14795        assert!(plan.union_index.is_empty());
14796    }
14797
14798    #[test]
14799    fn align_left_empty_left() {
14800        let left = Index::new(Vec::new());
14801        let right = Index::new(vec![1_i64.into()]);
14802
14803        let plan = align_left(&left, &right);
14804        assert!(plan.union_index.is_empty());
14805    }
14806
14807    // === bd-2gi.13: Index model and indexer semantics ===
14808
14809    use super::DuplicateKeep;
14810
14811    #[test]
14812    fn contains_finds_existing_label() {
14813        let index = Index::from_i64(vec![10, 20, 30]);
14814        assert!(index.contains(&IndexLabel::Int64(20)));
14815        assert!(!index.contains(&IndexLabel::Int64(99)));
14816    }
14817
14818    #[test]
14819    fn get_indexer_bulk_lookup() {
14820        let index = Index::new(vec!["a".into(), "b".into(), "c".into()]);
14821        let target = Index::new(vec!["c".into(), "a".into(), "z".into()]);
14822        assert_eq!(index.get_indexer(&target), vec![Some(2), Some(0), None]);
14823    }
14824
14825    #[test]
14826    fn isin_membership_mask() {
14827        let index = Index::from_i64(vec![1, 2, 3, 4, 5]);
14828        let values = vec![IndexLabel::Int64(2), IndexLabel::Int64(4)];
14829        assert_eq!(index.isin(&values), vec![false, true, false, true, false]);
14830    }
14831
14832    #[test]
14833    fn unique_preserves_first_seen_order() {
14834        let index = Index::new(vec![
14835            "b".into(),
14836            "a".into(),
14837            "b".into(),
14838            "c".into(),
14839            "a".into(),
14840        ]);
14841        let uniq = index.unique();
14842        assert_eq!(uniq.labels(), &["b".into(), "a".into(), "c".into()]);
14843    }
14844
14845    #[test]
14846    fn duplicated_keep_first() {
14847        let index = Index::from_i64(vec![1, 2, 1, 3, 2]);
14848        assert_eq!(
14849            index.duplicated(DuplicateKeep::First),
14850            vec![false, false, true, false, true]
14851        );
14852    }
14853
14854    #[test]
14855    fn duplicated_keep_last() {
14856        let index = Index::from_i64(vec![1, 2, 1, 3, 2]);
14857        assert_eq!(
14858            index.duplicated(DuplicateKeep::Last),
14859            vec![true, true, false, false, false]
14860        );
14861    }
14862
14863    #[test]
14864    fn duplicated_keep_none_marks_all() {
14865        let index = Index::from_i64(vec![1, 2, 1, 3, 2]);
14866        assert_eq!(
14867            index.duplicated(DuplicateKeep::None),
14868            vec![true, true, true, false, true]
14869        );
14870    }
14871
14872    #[test]
14873    fn drop_duplicates_equals_unique() {
14874        let index = Index::from_i64(vec![3, 1, 3, 2, 1]);
14875        assert_eq!(index.drop_duplicates(), index.unique());
14876    }
14877
14878    #[test]
14879    fn index_drop_duplicates_keep_last() {
14880        let index = Index::new(vec![
14881            "llama".into(),
14882            "cow".into(),
14883            "llama".into(),
14884            "beetle".into(),
14885            "llama".into(),
14886            "hippo".into(),
14887        ])
14888        .set_names(Some("animals"));
14889
14890        let deduped = index.drop_duplicates_keep(DuplicateKeep::Last);
14891
14892        assert_eq!(
14893            deduped.labels(),
14894            &[
14895                IndexLabel::from("cow"),
14896                IndexLabel::from("beetle"),
14897                IndexLabel::from("llama"),
14898                IndexLabel::from("hippo"),
14899            ]
14900        );
14901        assert_eq!(deduped.name(), Some("animals"));
14902    }
14903
14904    #[test]
14905    fn index_drop_duplicates_keep_none_discards_all_duplicates() {
14906        let index = Index::new(vec![
14907            "llama".into(),
14908            "cow".into(),
14909            "llama".into(),
14910            "beetle".into(),
14911            "llama".into(),
14912            "hippo".into(),
14913        ]);
14914
14915        let deduped = index.drop_duplicates_keep(DuplicateKeep::None);
14916
14917        assert_eq!(
14918            deduped.labels(),
14919            &[
14920                IndexLabel::from("cow"),
14921                IndexLabel::from("beetle"),
14922                IndexLabel::from("hippo"),
14923            ]
14924        );
14925    }
14926
14927    #[test]
14928    fn intersection_preserves_left_order() {
14929        let left = Index::new(vec!["c".into(), "a".into(), "b".into()]);
14930        let right = Index::new(vec!["b".into(), "d".into(), "a".into()]);
14931        let result = left.intersection(&right);
14932        assert_eq!(result.labels(), &["a".into(), "b".into()]);
14933    }
14934
14935    #[test]
14936    fn intersection_deduplicates() {
14937        let left = Index::from_i64(vec![1, 1, 2]);
14938        let right = Index::from_i64(vec![1, 2, 2]);
14939        let result = left.intersection(&right);
14940        assert_eq!(
14941            result.labels(),
14942            &[IndexLabel::Int64(1), IndexLabel::Int64(2)]
14943        );
14944    }
14945
14946    #[test]
14947    fn union_with_combines_unique_labels() {
14948        let left = Index::from_i64(vec![1, 2, 3]);
14949        let right = Index::from_i64(vec![2, 4, 3]);
14950        let result = left.union_with(&right);
14951        assert_eq!(
14952            result.labels(),
14953            &[
14954                IndexLabel::Int64(1),
14955                IndexLabel::Int64(2),
14956                IndexLabel::Int64(3),
14957                IndexLabel::Int64(4),
14958            ]
14959        );
14960    }
14961
14962    #[test]
14963    fn difference_removes_other_labels() {
14964        let left = Index::from_i64(vec![1, 2, 3, 4]);
14965        let right = Index::from_i64(vec![2, 4]);
14966        let result = left.difference(&right);
14967        assert_eq!(
14968            result.labels(),
14969            &[IndexLabel::Int64(1), IndexLabel::Int64(3)]
14970        );
14971    }
14972
14973    #[test]
14974    fn difference_preserves_self_name_even_when_other_differs_6r1lq() {
14975        // Per br-frankenpandas-6r1lq: difference is asymmetric — pandas
14976        // preserves self.name regardless of whether other has the same name.
14977        let left = Index::from_i64(vec![1, 2, 3]).set_name("left_axis");
14978        let right = Index::from_i64(vec![2, 3, 4]).set_name("right_axis");
14979        let result = left.difference(&right);
14980        assert_eq!(result.name(), Some("left_axis"));
14981    }
14982
14983    #[test]
14984    fn symmetric_difference_xor() {
14985        let left = Index::from_i64(vec![1, 2, 3]);
14986        let right = Index::from_i64(vec![2, 3, 4]);
14987        let result = left.symmetric_difference(&right);
14988        assert_eq!(
14989            result.labels(),
14990            &[IndexLabel::Int64(1), IndexLabel::Int64(4)]
14991        );
14992    }
14993
14994    #[test]
14995    fn argsort_returns_sorting_indices() {
14996        let index = Index::from_i64(vec![30, 10, 20]);
14997        assert_eq!(index.argsort(), vec![1, 2, 0]);
14998    }
14999
15000    #[test]
15001    fn sort_values_produces_sorted_index() {
15002        let index = Index::new(vec!["c".into(), "a".into(), "b".into()]);
15003        let sorted = index.sort_values();
15004        assert_eq!(sorted.labels(), &["a".into(), "b".into(), "c".into()]);
15005    }
15006
15007    #[test]
15008    fn take_selects_by_position() {
15009        let index = Index::from_i64(vec![10, 20, 30, 40, 50]);
15010        let taken = index.take(&[4, 0, 2]);
15011        assert_eq!(
15012            taken.labels(),
15013            &[
15014                IndexLabel::Int64(50),
15015                IndexLabel::Int64(10),
15016                IndexLabel::Int64(30),
15017            ]
15018        );
15019    }
15020
15021    #[test]
15022    fn slice_extracts_subrange() {
15023        let index = Index::from_i64(vec![10, 20, 30, 40, 50]);
15024        let sliced = index.slice(1, 3);
15025        assert_eq!(
15026            sliced.labels(),
15027            &[
15028                IndexLabel::Int64(20),
15029                IndexLabel::Int64(30),
15030                IndexLabel::Int64(40),
15031            ]
15032        );
15033    }
15034
15035    #[test]
15036    fn slice_clamps_to_bounds() {
15037        let index = Index::from_i64(vec![1, 2, 3]);
15038        let sliced = index.slice(1, 100);
15039        assert_eq!(
15040            sliced.labels(),
15041            &[IndexLabel::Int64(2), IndexLabel::Int64(3)]
15042        );
15043    }
15044
15045    #[test]
15046    fn from_range_basic() {
15047        let index = Index::from_range(0, 5, 1);
15048        assert_eq!(
15049            index.labels(),
15050            &[
15051                IndexLabel::Int64(0),
15052                IndexLabel::Int64(1),
15053                IndexLabel::Int64(2),
15054                IndexLabel::Int64(3),
15055                IndexLabel::Int64(4),
15056            ]
15057        );
15058    }
15059
15060    #[test]
15061    fn from_range_step_2() {
15062        let index = Index::from_range(0, 10, 3);
15063        assert_eq!(
15064            index.labels(),
15065            &[
15066                IndexLabel::Int64(0),
15067                IndexLabel::Int64(3),
15068                IndexLabel::Int64(6),
15069                IndexLabel::Int64(9),
15070            ]
15071        );
15072    }
15073
15074    #[test]
15075    fn from_range_negative_step() {
15076        let index = Index::from_range(5, 0, -2);
15077        assert_eq!(
15078            index.labels(),
15079            &[
15080                IndexLabel::Int64(5),
15081                IndexLabel::Int64(3),
15082                IndexLabel::Int64(1),
15083            ]
15084        );
15085    }
15086
15087    #[test]
15088    fn from_range_empty_when_step_zero() {
15089        let index = Index::from_range(0, 5, 0);
15090        assert!(index.is_empty());
15091    }
15092
15093    #[test]
15094    fn set_ops_empty_inputs() {
15095        let empty = Index::new(Vec::new());
15096        let non_empty = Index::from_i64(vec![1, 2]);
15097        assert!(empty.intersection(&non_empty).is_empty());
15098        assert_eq!(empty.union_with(&non_empty), non_empty);
15099        assert!(empty.difference(&non_empty).is_empty());
15100        assert_eq!(empty.symmetric_difference(&non_empty), non_empty);
15101    }
15102
15103    // === AG-11: Leapfrog Triejoin Tests ===
15104
15105    use super::{leapfrog_intersection, leapfrog_union, multi_way_align};
15106
15107    #[test]
15108    fn leapfrog_union_three_indexes() {
15109        let a = Index::from_i64(vec![1, 3, 5]);
15110        let b = Index::from_i64(vec![2, 3, 6]);
15111        let c = Index::from_i64(vec![4, 5, 6]);
15112        let result = leapfrog_union(&[&a, &b, &c]);
15113        assert_eq!(
15114            result.labels(),
15115            &[
15116                IndexLabel::Int64(1),
15117                IndexLabel::Int64(2),
15118                IndexLabel::Int64(3),
15119                IndexLabel::Int64(4),
15120                IndexLabel::Int64(5),
15121                IndexLabel::Int64(6),
15122            ]
15123        );
15124    }
15125
15126    #[test]
15127    fn leapfrog_union_deduplicates() {
15128        let a = Index::from_i64(vec![1, 1, 2]);
15129        let b = Index::from_i64(vec![2, 2, 3]);
15130        let result = leapfrog_union(&[&a, &b]);
15131        assert_eq!(
15132            result.labels(),
15133            &[
15134                IndexLabel::Int64(1),
15135                IndexLabel::Int64(2),
15136                IndexLabel::Int64(3),
15137            ]
15138        );
15139    }
15140
15141    #[test]
15142    fn leapfrog_union_single_index() {
15143        let a = Index::from_i64(vec![3, 1, 2]);
15144        let result = leapfrog_union(&[&a]);
15145        assert_eq!(
15146            result.labels(),
15147            &[
15148                IndexLabel::Int64(1),
15149                IndexLabel::Int64(2),
15150                IndexLabel::Int64(3),
15151            ]
15152        );
15153    }
15154
15155    #[test]
15156    fn leapfrog_union_empty() {
15157        let result = leapfrog_union(&[]);
15158        assert!(result.is_empty());
15159    }
15160
15161    #[test]
15162    fn leapfrog_union_with_empty_input() {
15163        let a = Index::from_i64(vec![1, 2]);
15164        let b = Index::new(Vec::new());
15165        let result = leapfrog_union(&[&a, &b]);
15166        assert_eq!(
15167            result.labels(),
15168            &[IndexLabel::Int64(1), IndexLabel::Int64(2)]
15169        );
15170    }
15171
15172    #[test]
15173    fn leapfrog_intersection_three_indexes() {
15174        let a = Index::from_i64(vec![1, 2, 3, 4, 5]);
15175        let b = Index::from_i64(vec![2, 3, 5, 7]);
15176        let c = Index::from_i64(vec![3, 5, 8]);
15177        let result = leapfrog_intersection(&[&a, &b, &c]);
15178        assert_eq!(
15179            result.labels(),
15180            &[IndexLabel::Int64(3), IndexLabel::Int64(5)]
15181        );
15182    }
15183
15184    #[test]
15185    fn leapfrog_intersection_disjoint() {
15186        let a = Index::from_i64(vec![1, 2]);
15187        let b = Index::from_i64(vec![3, 4]);
15188        let result = leapfrog_intersection(&[&a, &b]);
15189        assert!(result.is_empty());
15190    }
15191
15192    #[test]
15193    fn leapfrog_intersection_identical() {
15194        let a = Index::from_i64(vec![1, 2, 3]);
15195        let b = Index::from_i64(vec![1, 2, 3]);
15196        let result = leapfrog_intersection(&[&a, &b]);
15197        assert_eq!(
15198            result.labels(),
15199            &[
15200                IndexLabel::Int64(1),
15201                IndexLabel::Int64(2),
15202                IndexLabel::Int64(3),
15203            ]
15204        );
15205    }
15206
15207    #[test]
15208    fn leapfrog_intersection_with_unsorted_input() {
15209        let a = Index::from_i64(vec![5, 3, 1, 4, 2]);
15210        let b = Index::from_i64(vec![4, 2, 6, 1]);
15211        let result = leapfrog_intersection(&[&a, &b]);
15212        assert_eq!(
15213            result.labels(),
15214            &[
15215                IndexLabel::Int64(1),
15216                IndexLabel::Int64(2),
15217                IndexLabel::Int64(4),
15218            ]
15219        );
15220    }
15221
15222    #[test]
15223    fn leapfrog_intersection_empty_input() {
15224        let a = Index::from_i64(vec![1, 2, 3]);
15225        let b = Index::new(Vec::new());
15226        let result = leapfrog_intersection(&[&a, &b]);
15227        assert!(result.is_empty());
15228    }
15229
15230    #[test]
15231    fn multi_way_align_three_indexes() {
15232        let a = Index::from_i64(vec![1, 3]);
15233        let b = Index::from_i64(vec![2, 3]);
15234        let c = Index::from_i64(vec![1, 2]);
15235        let plan = multi_way_align(&[&a, &b, &c]);
15236        assert_eq!(
15237            plan.union_index.labels(),
15238            &[
15239                IndexLabel::Int64(1),
15240                IndexLabel::Int64(3),
15241                IndexLabel::Int64(2),
15242            ]
15243        );
15244        assert_eq!(plan.positions.len(), 3);
15245        // a has 1 at pos 0, 3 at pos 1, no 2
15246        assert_eq!(plan.positions[0], vec![Some(0), Some(1), None]);
15247        // b has no 1, 3 at pos 1, 2 at pos 0
15248        assert_eq!(plan.positions[1], vec![None, Some(1), Some(0)]);
15249        // c has 1 at pos 0, no 3, 2 at pos 1
15250        assert_eq!(plan.positions[2], vec![Some(0), None, Some(1)]);
15251    }
15252
15253    #[test]
15254    fn multi_way_align_empty() {
15255        let plan = multi_way_align(&[]);
15256        assert!(plan.union_index.is_empty());
15257        assert!(plan.positions.is_empty());
15258    }
15259
15260    #[test]
15261    fn multi_way_align_isomorphic_with_pairwise() {
15262        // AG-11 contract: multi-way union produces same label set as
15263        // iterative pairwise union (associativity + commutativity).
15264        let a = Index::from_i64(vec![1, 4, 7]);
15265        let b = Index::from_i64(vec![2, 4, 8]);
15266        let c = Index::from_i64(vec![3, 7, 8]);
15267
15268        let multi = leapfrog_union(&[&a, &b, &c]);
15269
15270        // Iterative pairwise
15271        let ab = a.union_with(&b);
15272        let abc = ab.union_with(&c);
15273        let pairwise = abc.sort_values();
15274
15275        assert_eq!(multi.labels(), pairwise.labels());
15276    }
15277
15278    #[test]
15279    fn leapfrog_union_utf8_labels() {
15280        let a = Index::new(vec!["c".into(), "a".into()]);
15281        let b = Index::new(vec!["b".into(), "d".into()]);
15282        let result = leapfrog_union(&[&a, &b]);
15283        assert_eq!(
15284            result.labels(),
15285            &["a".into(), "b".into(), "c".into(), "d".into()]
15286        );
15287    }
15288
15289    #[test]
15290    fn leapfrog_large_multi_way() {
15291        // 5 indexes, each 1000 labels, overlapping ranges
15292        let indexes: Vec<Index> = (0..5)
15293            .map(|i| {
15294                let start = i * 200;
15295                let end = start + 1000;
15296                Index::from_i64((start..end).collect())
15297            })
15298            .collect();
15299        let refs: Vec<&Index> = indexes.iter().collect();
15300
15301        let union = leapfrog_union(&refs);
15302        // Range is 0..1800 (0-999, 200-1199, 400-1399, 600-1599, 800-1799)
15303        assert_eq!(union.len(), 1800);
15304
15305        let intersection = leapfrog_intersection(&refs);
15306        // Intersection is 800..999 (all 5 overlap)
15307        assert_eq!(intersection.len(), 200);
15308    }
15309
15310    // === AG-11-T: Full test plan (bd-2t5e.17) ===
15311
15312    #[test]
15313    fn ag11t_two_sorted_identical() {
15314        let a = Index::from_i64(vec![1, 2, 3]);
15315        let b = Index::from_i64(vec![1, 2, 3]);
15316        let result = leapfrog_union(&[&a, &b]);
15317        assert_eq!(
15318            result.labels(),
15319            &[
15320                IndexLabel::Int64(1),
15321                IndexLabel::Int64(2),
15322                IndexLabel::Int64(3)
15323            ]
15324        );
15325        let plan = multi_way_align(&[&a, &b]);
15326        // Both map to identity positions
15327        assert_eq!(plan.positions[0], vec![Some(0), Some(1), Some(2)]);
15328        assert_eq!(plan.positions[1], vec![Some(0), Some(1), Some(2)]);
15329        eprintln!("[AG-11-T] two_sorted_identical | in=[3,3] out=3 | PASS");
15330    }
15331
15332    #[test]
15333    fn ag11t_two_sorted_disjoint() {
15334        let a = Index::from_i64(vec![1, 2, 3]);
15335        let b = Index::from_i64(vec![4, 5, 6]);
15336        let result = leapfrog_union(&[&a, &b]);
15337        assert_eq!(result.len(), 6);
15338        assert_eq!(result.labels()[0], IndexLabel::Int64(1));
15339        assert_eq!(result.labels()[5], IndexLabel::Int64(6));
15340        eprintln!("[AG-11-T] two_sorted_disjoint | in=[3,3] out=6 | PASS");
15341    }
15342
15343    #[test]
15344    fn ag11t_two_sorted_overlapping_with_positions() {
15345        let a = Index::from_i64(vec![1, 3, 5]);
15346        let b = Index::from_i64(vec![2, 3, 4]);
15347        let plan = multi_way_align(&[&a, &b]);
15348        assert_eq!(
15349            plan.union_index.labels(),
15350            &[
15351                IndexLabel::Int64(1),
15352                IndexLabel::Int64(3),
15353                IndexLabel::Int64(5),
15354                IndexLabel::Int64(2),
15355                IndexLabel::Int64(4),
15356            ]
15357        );
15358        assert_eq!(
15359            plan.positions[0],
15360            vec![Some(0), Some(1), Some(2), None, None]
15361        );
15362        assert_eq!(
15363            plan.positions[1],
15364            vec![None, Some(1), None, Some(0), Some(2)]
15365        );
15366        eprintln!("[AG-11-T] two_sorted_overlapping | in=[3,3] out=5 | PASS");
15367    }
15368
15369    #[test]
15370    fn ag11t_five_way_union_vs_pairwise() {
15371        let indexes: Vec<Index> = (0..5)
15372            .map(|i| Index::from_i64(vec![i * 10, i * 10 + 1, i * 10 + 2]))
15373            .collect();
15374        let refs: Vec<&Index> = indexes.iter().collect();
15375
15376        let leapfrog = leapfrog_union(&refs);
15377
15378        // Iterative pairwise
15379        let mut pairwise = indexes[0].clone();
15380        for idx in &indexes[1..] {
15381            pairwise = pairwise.union_with(idx);
15382        }
15383        let pairwise = pairwise.sort_values();
15384
15385        assert_eq!(leapfrog.labels(), pairwise.labels());
15386        eprintln!(
15387            "[AG-11-T] five_way_union_vs_pairwise | in=[3x5] out={} | PASS",
15388            leapfrog.len()
15389        );
15390    }
15391
15392    #[test]
15393    fn ag11t_single_element_indexes() {
15394        let indexes: Vec<Index> = (0..10).map(|i| Index::from_i64(vec![i])).collect();
15395        let refs: Vec<&Index> = indexes.iter().collect();
15396        let result = leapfrog_union(&refs);
15397        assert_eq!(result.len(), 10);
15398        for (i, label) in result.labels().iter().enumerate() {
15399            assert_eq!(*label, IndexLabel::Int64(i as i64));
15400        }
15401        eprintln!("[AG-11-T] single_element_indexes | in=[1x10] out=10 | PASS");
15402    }
15403
15404    #[test]
15405    fn ag11t_all_same_labels() {
15406        let base = Index::from_i64(vec![1, 2, 3]);
15407        let refs: Vec<&Index> = (0..5).map(|_| &base).collect();
15408        let plan = multi_way_align(&refs);
15409        assert_eq!(
15410            plan.union_index.labels(),
15411            &[
15412                IndexLabel::Int64(1),
15413                IndexLabel::Int64(2),
15414                IndexLabel::Int64(3)
15415            ]
15416        );
15417        // All 5 inputs should have identity positions
15418        for pos_vec in &plan.positions {
15419            assert_eq!(*pos_vec, vec![Some(0), Some(1), Some(2)]);
15420        }
15421        eprintln!("[AG-11-T] all_same_labels | in=[3x5] out=3 | PASS");
15422    }
15423
15424    #[test]
15425    fn ag11t_iso_associativity() {
15426        let a = Index::from_i64(vec![1, 4, 7, 10]);
15427        let b = Index::from_i64(vec![2, 4, 8, 10]);
15428        let c = Index::from_i64(vec![3, 7, 8, 10]);
15429
15430        let leapfrog_result = leapfrog_union(&[&a, &b, &c]);
15431
15432        // union(A, union(B, C))
15433        let bc = b.union_with(&c).sort_values();
15434        let a_bc = a.union_with(&bc).sort_values();
15435
15436        // union(union(A, B), C)
15437        let ab = a.union_with(&b).sort_values();
15438        let ab_c = ab.union_with(&c).sort_values();
15439
15440        assert_eq!(leapfrog_result.labels(), a_bc.labels());
15441        assert_eq!(leapfrog_result.labels(), ab_c.labels());
15442        eprintln!("[AG-11-T] iso_associativity | verified | PASS");
15443    }
15444
15445    #[test]
15446    fn ag11t_iso_commutativity() {
15447        let a = Index::from_i64(vec![1, 5, 9]);
15448        let b = Index::from_i64(vec![2, 5, 8]);
15449        let c = Index::from_i64(vec![3, 5, 7]);
15450
15451        let abc = leapfrog_union(&[&a, &b, &c]);
15452        let cab = leapfrog_union(&[&c, &a, &b]);
15453        let bca = leapfrog_union(&[&b, &c, &a]);
15454
15455        // All orderings produce same sorted output
15456        assert_eq!(abc.labels(), cab.labels());
15457        assert_eq!(abc.labels(), bca.labels());
15458        eprintln!("[AG-11-T] iso_commutativity | verified | PASS");
15459    }
15460
15461    // ── Index: min/max/argmin/argmax ──
15462
15463    #[test]
15464    fn index_min_max_int() {
15465        let idx = Index::new(vec![3_i64.into(), 1_i64.into(), 2_i64.into()]);
15466        assert_eq!(idx.min(), Some(&IndexLabel::Int64(1)));
15467        assert_eq!(idx.max(), Some(&IndexLabel::Int64(3)));
15468        assert_eq!(idx.argmin(), Some(1));
15469        assert_eq!(idx.argmax(), Some(0));
15470    }
15471
15472    #[test]
15473    fn index_min_max_utf8() {
15474        let idx = Index::new(vec!["c".into(), "a".into(), "b".into()]);
15475        assert_eq!(idx.min(), Some(&IndexLabel::Utf8("a".into())));
15476        assert_eq!(idx.max(), Some(&IndexLabel::Utf8("c".into())));
15477        assert_eq!(idx.argmin(), Some(1));
15478        assert_eq!(idx.argmax(), Some(0));
15479    }
15480
15481    #[test]
15482    fn index_min_max_empty() {
15483        let idx = Index::new(vec![]);
15484        assert_eq!(idx.min(), None);
15485        assert_eq!(idx.max(), None);
15486        assert_eq!(idx.argmin(), None);
15487        assert_eq!(idx.argmax(), None);
15488    }
15489
15490    #[test]
15491    fn index_nunique() {
15492        let idx = Index::new(vec![1_i64.into(), 2_i64.into(), 1_i64.into()]);
15493        assert_eq!(idx.nunique(), 2);
15494    }
15495
15496    #[test]
15497    fn index_nunique_dropna_false_counts_timedelta_nat_once() {
15498        let idx = Index::from_timedelta64(vec![Timedelta::NAT, Timedelta::NAT, 5]);
15499        assert_eq!(idx.nunique(), 1);
15500        assert_eq!(idx.nunique_with_dropna(false), 2);
15501    }
15502
15503    #[test]
15504    fn index_nunique_dropna_false_counts_datetime_nat_once() {
15505        let idx = Index::new(vec![
15506            IndexLabel::Datetime64(i64::MIN),
15507            IndexLabel::Datetime64(i64::MIN),
15508            IndexLabel::Datetime64(1_700_000_000_000_000_000),
15509        ]);
15510        assert_eq!(idx.nunique(), 1);
15511        assert_eq!(idx.nunique_with_dropna(false), 2);
15512    }
15513
15514    // ── Index: map/rename/drop/astype ──
15515
15516    #[test]
15517    fn index_map() {
15518        let idx = Index::new(vec![1_i64.into(), 2_i64.into(), 3_i64.into()]);
15519        let mapped = idx.map(|l| match l {
15520            IndexLabel::Int64(v) => IndexLabel::Int64(v * 10),
15521            other => other.clone(),
15522        });
15523        assert_eq!(mapped.labels()[0], IndexLabel::Int64(10));
15524        assert_eq!(mapped.labels()[2], IndexLabel::Int64(30));
15525    }
15526
15527    #[test]
15528    fn index_drop_labels() {
15529        let idx = Index::new(vec!["a".into(), "b".into(), "c".into()]);
15530        let dropped = idx.drop_labels(&["b".into()]);
15531        assert_eq!(dropped.len(), 2);
15532        assert_eq!(dropped.labels()[0], IndexLabel::Utf8("a".into()));
15533        assert_eq!(dropped.labels()[1], IndexLabel::Utf8("c".into()));
15534    }
15535
15536    #[test]
15537    fn index_astype_str() {
15538        let idx = Index::new(vec![1_i64.into(), 2_i64.into()]);
15539        let str_idx = idx.astype_str();
15540        assert_eq!(str_idx.labels()[0], IndexLabel::Utf8("1".into()));
15541        assert_eq!(str_idx.labels()[1], IndexLabel::Utf8("2".into()));
15542    }
15543
15544    #[test]
15545    fn index_astype_int() {
15546        let idx = Index::new(vec![
15547            IndexLabel::Utf8("10".into()),
15548            IndexLabel::Utf8("20".into()),
15549        ]);
15550        let int_idx = idx.astype_int();
15551        assert_eq!(int_idx.labels()[0], IndexLabel::Int64(10));
15552        assert_eq!(int_idx.labels()[1], IndexLabel::Int64(20));
15553    }
15554
15555    #[test]
15556    fn index_isna_notna() {
15557        let idx = Index::new(vec![1_i64.into(), 2_i64.into()]);
15558        assert_eq!(idx.isna(), vec![false, false]);
15559        assert_eq!(idx.notna(), vec![true, true]);
15560    }
15561
15562    #[test]
15563    fn index_isna_notna_detects_datetimelike_nat() {
15564        let datetime_idx = Index::new(vec![
15565            IndexLabel::Datetime64(i64::MIN),
15566            IndexLabel::Datetime64(1_700_000_000_000_000_000),
15567        ]);
15568        assert_eq!(datetime_idx.isna(), vec![true, false]);
15569        assert_eq!(datetime_idx.notna(), vec![false, true]);
15570
15571        let timedelta_idx = Index::from_timedelta64(vec![Timedelta::NAT, 5]);
15572        assert_eq!(timedelta_idx.isna(), vec![true, false]);
15573        assert_eq!(timedelta_idx.notna(), vec![false, true]);
15574    }
15575
15576    #[test]
15577    fn index_fillna_replaces_datetime_nat_and_preserves_name() {
15578        let idx = Index::new(vec![
15579            IndexLabel::Datetime64(i64::MIN),
15580            IndexLabel::Datetime64(1_700_000_000_000_000_000),
15581            IndexLabel::Datetime64(i64::MIN),
15582        ])
15583        .set_name("when");
15584
15585        let filled = idx.fillna(&IndexLabel::Datetime64(1_800_000_000_000_000_000));
15586
15587        assert_eq!(
15588            filled.labels(),
15589            &[
15590                IndexLabel::Datetime64(1_800_000_000_000_000_000),
15591                IndexLabel::Datetime64(1_700_000_000_000_000_000),
15592                IndexLabel::Datetime64(1_800_000_000_000_000_000),
15593            ]
15594        );
15595        assert_eq!(filled.name(), Some("when"));
15596    }
15597
15598    #[test]
15599    fn index_fillna_replaces_timedelta_nat() {
15600        let idx = Index::from_timedelta64(vec![Timedelta::NAT, 5, Timedelta::NAT]);
15601
15602        let filled = idx.fillna(&IndexLabel::Timedelta64(42));
15603
15604        assert_eq!(
15605            filled.labels(),
15606            &[
15607                IndexLabel::Timedelta64(42),
15608                IndexLabel::Timedelta64(5),
15609                IndexLabel::Timedelta64(42),
15610            ]
15611        );
15612    }
15613
15614    #[test]
15615    fn index_dropna_removes_missing_and_preserves_name() {
15616        let idx =
15617            Index::from_timedelta64(vec![1, Timedelta::NAT, 3, Timedelta::NAT, 5]).set_name("t");
15618        let dropped = idx.dropna();
15619        assert_eq!(
15620            dropped.labels(),
15621            &[
15622                IndexLabel::Timedelta64(1),
15623                IndexLabel::Timedelta64(3),
15624                IndexLabel::Timedelta64(5),
15625            ]
15626        );
15627        assert_eq!(dropped.name(), Some("t"));
15628    }
15629
15630    #[test]
15631    fn index_dropna_all_present_is_noop() {
15632        let idx = Index::from_i64(vec![1, 2, 3]);
15633        let dropped = idx.dropna();
15634        assert_eq!(
15635            dropped.labels(),
15636            &[
15637                IndexLabel::Int64(1),
15638                IndexLabel::Int64(2),
15639                IndexLabel::Int64(3),
15640            ]
15641        );
15642    }
15643
15644    #[test]
15645    fn index_insert_at_middle_position() {
15646        let idx = Index::from_i64(vec![1, 3, 4]);
15647        let result = idx.insert(1, IndexLabel::Int64(2)).unwrap();
15648        assert_eq!(
15649            result.labels(),
15650            &[
15651                IndexLabel::Int64(1),
15652                IndexLabel::Int64(2),
15653                IndexLabel::Int64(3),
15654                IndexLabel::Int64(4),
15655            ]
15656        );
15657    }
15658
15659    #[test]
15660    fn index_insert_at_end_appends() {
15661        let idx = Index::from_i64(vec![1, 2]);
15662        let result = idx.insert(2, IndexLabel::Int64(3)).unwrap();
15663        assert_eq!(
15664            result.labels(),
15665            &[
15666                IndexLabel::Int64(1),
15667                IndexLabel::Int64(2),
15668                IndexLabel::Int64(3),
15669            ]
15670        );
15671    }
15672
15673    #[test]
15674    fn index_insert_past_end_errors() {
15675        let idx = Index::from_i64(vec![1, 2]);
15676        let err = idx.insert(5, IndexLabel::Int64(9)).unwrap_err();
15677        assert!(matches!(err, crate::IndexError::OutOfBounds { .. }));
15678    }
15679
15680    #[test]
15681    fn index_delete_removes_position() {
15682        let idx = Index::from_i64(vec![10, 20, 30]).set_name("k");
15683        let result = idx.delete(1).unwrap();
15684        assert_eq!(
15685            result.labels(),
15686            &[IndexLabel::Int64(10), IndexLabel::Int64(30)]
15687        );
15688        assert_eq!(result.name(), Some("k"));
15689    }
15690
15691    #[test]
15692    fn index_delete_out_of_bounds_errors() {
15693        let idx = Index::from_i64(vec![1]);
15694        let err = idx.delete(1).unwrap_err();
15695        assert!(matches!(err, crate::IndexError::OutOfBounds { .. }));
15696    }
15697
15698    #[test]
15699    fn index_append_concatenates() {
15700        let a = Index::from_i64(vec![1, 2]).set_name("left");
15701        let b = Index::from_i64(vec![3, 4]);
15702        let result = a.append(&b);
15703        assert_eq!(
15704            result.labels(),
15705            &[
15706                IndexLabel::Int64(1),
15707                IndexLabel::Int64(2),
15708                IndexLabel::Int64(3),
15709                IndexLabel::Int64(4),
15710            ]
15711        );
15712        assert_eq!(result.name(), Some("left"));
15713    }
15714
15715    #[test]
15716    fn index_append_empty_is_noop() {
15717        let a = Index::from_i64(vec![1, 2]);
15718        let empty = Index::new(Vec::new());
15719        let result = a.append(&empty);
15720        assert_eq!(result.labels(), a.labels());
15721    }
15722
15723    #[test]
15724    fn index_repeat_duplicates_each_label() {
15725        let idx = Index::from_i64(vec![1, 2, 3]).set_name("k");
15726        let result = idx.repeat(2);
15727        assert_eq!(
15728            result.labels(),
15729            &[
15730                IndexLabel::Int64(1),
15731                IndexLabel::Int64(1),
15732                IndexLabel::Int64(2),
15733                IndexLabel::Int64(2),
15734                IndexLabel::Int64(3),
15735                IndexLabel::Int64(3),
15736            ]
15737        );
15738        assert_eq!(result.name(), Some("k"));
15739    }
15740
15741    #[test]
15742    fn index_repeat_zero_yields_empty() {
15743        let idx = Index::from_i64(vec![1, 2, 3]);
15744        let result = idx.repeat(0);
15745        assert!(result.labels().is_empty());
15746    }
15747
15748    #[test]
15749    fn index_repeat_one_is_clone() {
15750        let idx = Index::from_i64(vec![1, 2]);
15751        let result = idx.repeat(1);
15752        assert_eq!(result.labels(), idx.labels());
15753    }
15754
15755    #[test]
15756    fn index_equals_same_labels_ignores_name() {
15757        let a = Index::from_i64(vec![1, 2, 3]).set_name("x");
15758        let b = Index::from_i64(vec![1, 2, 3]).set_name("y");
15759        assert!(a.equals(&b));
15760    }
15761
15762    #[test]
15763    fn index_equals_differing_labels_false() {
15764        let a = Index::from_i64(vec![1, 2, 3]);
15765        let b = Index::from_i64(vec![1, 2]);
15766        assert!(!a.equals(&b));
15767    }
15768
15769    #[test]
15770    fn index_identical_requires_matching_name() {
15771        let a = Index::from_i64(vec![1, 2]).set_name("x");
15772        let b = Index::from_i64(vec![1, 2]).set_name("y");
15773        assert!(a.equals(&b));
15774        assert!(!a.identical(&b));
15775        let c = Index::from_i64(vec![1, 2]).set_name("x");
15776        assert!(a.identical(&c));
15777    }
15778
15779    #[test]
15780    fn index_value_counts_sorts_by_descending_count() {
15781        let idx = Index::new(vec![
15782            "a".into(),
15783            "b".into(),
15784            "a".into(),
15785            "c".into(),
15786            "a".into(),
15787            "b".into(),
15788        ]);
15789        let counts = idx.value_counts();
15790        assert_eq!(counts[0].0, IndexLabel::Utf8("a".into()));
15791        assert_eq!(counts[0].1, 3);
15792        assert_eq!(counts[1].0, IndexLabel::Utf8("b".into()));
15793        assert_eq!(counts[1].1, 2);
15794        assert_eq!(counts[2].0, IndexLabel::Utf8("c".into()));
15795        assert_eq!(counts[2].1, 1);
15796    }
15797
15798    #[test]
15799    fn index_value_counts_empty() {
15800        let idx = Index::new(Vec::<IndexLabel>::new());
15801        assert!(idx.value_counts().is_empty());
15802    }
15803
15804    #[test]
15805    fn index_value_counts_drops_missing_by_default() {
15806        let idx = Index::new(vec![
15807            IndexLabel::Datetime64(i64::MIN),
15808            IndexLabel::Utf8("a".into()),
15809            IndexLabel::Utf8("a".into()),
15810            IndexLabel::Datetime64(i64::MIN),
15811        ]);
15812
15813        let counts = idx.value_counts();
15814        assert_eq!(counts, vec![(IndexLabel::Utf8("a".into()), 2)]);
15815    }
15816
15817    #[test]
15818    fn index_value_counts_with_options_preserves_first_seen_order_when_unsorted() {
15819        let idx = Index::new(vec![
15820            IndexLabel::Datetime64(i64::MIN),
15821            IndexLabel::Utf8("b".into()),
15822            IndexLabel::Utf8("a".into()),
15823            IndexLabel::Utf8("b".into()),
15824        ]);
15825
15826        let counts = idx.value_counts_with_options(false, false, false, false);
15827        assert_eq!(
15828            counts,
15829            vec![
15830                (IndexLabel::Datetime64(i64::MIN), Scalar::Int64(1)),
15831                (IndexLabel::Utf8("b".into()), Scalar::Int64(2)),
15832                (IndexLabel::Utf8("a".into()), Scalar::Int64(1)),
15833            ]
15834        );
15835    }
15836
15837    #[test]
15838    fn index_value_counts_with_options_normalize_excludes_missing_from_denominator() {
15839        let idx = Index::new(vec![
15840            IndexLabel::Int64(1),
15841            IndexLabel::Int64(1),
15842            IndexLabel::Int64(2),
15843            IndexLabel::Datetime64(i64::MIN),
15844        ]);
15845
15846        let counts = idx.value_counts_with_options(true, true, false, true);
15847        assert!(matches!(
15848            counts.as_slice(),
15849            [
15850                (IndexLabel::Int64(1), Scalar::Float64(_)),
15851                (IndexLabel::Int64(2), Scalar::Float64(_))
15852            ]
15853        ));
15854        let [
15855            (IndexLabel::Int64(1), Scalar::Float64(first)),
15856            (IndexLabel::Int64(2), Scalar::Float64(second)),
15857        ] = counts.as_slice()
15858        else {
15859            return;
15860        };
15861        assert!((first - (2.0 / 3.0)).abs() < 1e-12);
15862        assert!((second - (1.0 / 3.0)).abs() < 1e-12);
15863    }
15864
15865    #[test]
15866    fn index_shift_positive_pads_left() {
15867        let idx = Index::from_i64(vec![1, 2, 3, 4]).set_name("k");
15868        let shifted = idx.shift(2, IndexLabel::Int64(-1));
15869        assert_eq!(
15870            shifted.labels(),
15871            &[
15872                IndexLabel::Int64(-1),
15873                IndexLabel::Int64(-1),
15874                IndexLabel::Int64(1),
15875                IndexLabel::Int64(2),
15876            ]
15877        );
15878        assert_eq!(shifted.name(), Some("k"));
15879    }
15880
15881    #[test]
15882    fn index_shift_negative_pads_right() {
15883        let idx = Index::from_i64(vec![1, 2, 3, 4]);
15884        let shifted = idx.shift(-1, IndexLabel::Int64(0));
15885        assert_eq!(
15886            shifted.labels(),
15887            &[
15888                IndexLabel::Int64(2),
15889                IndexLabel::Int64(3),
15890                IndexLabel::Int64(4),
15891                IndexLabel::Int64(0),
15892            ]
15893        );
15894    }
15895
15896    #[test]
15897    fn index_shift_zero_is_clone() {
15898        let idx = Index::from_i64(vec![1, 2, 3]);
15899        let shifted = idx.shift(0, IndexLabel::Int64(-1));
15900        assert_eq!(shifted.labels(), idx.labels());
15901    }
15902
15903    #[test]
15904    fn index_shift_larger_than_len_fills_all() {
15905        let idx = Index::from_i64(vec![1, 2, 3]);
15906        let shifted = idx.shift(10, IndexLabel::Int64(-1));
15907        assert_eq!(
15908            shifted.labels(),
15909            &[
15910                IndexLabel::Int64(-1),
15911                IndexLabel::Int64(-1),
15912                IndexLabel::Int64(-1),
15913            ]
15914        );
15915    }
15916
15917    #[test]
15918    fn index_any_all_basic() {
15919        let idx = Index::from_i64(vec![0, 0, 1]);
15920        assert!(idx.any());
15921        assert!(!idx.all());
15922
15923        let all_nonzero = Index::from_i64(vec![1, 2, 3]);
15924        assert!(all_nonzero.all());
15925        assert!(all_nonzero.any());
15926
15927        let all_zero = Index::from_i64(vec![0, 0]);
15928        assert!(!all_zero.any());
15929        assert!(!all_zero.all());
15930    }
15931
15932    #[test]
15933    fn index_all_empty_is_true() {
15934        let idx = Index::new(Vec::<IndexLabel>::new());
15935        assert!(idx.all());
15936        assert!(!idx.any());
15937    }
15938
15939    #[test]
15940    fn index_any_string_nonempty_truthy() {
15941        let idx = Index::new(vec!["".into(), "".into(), "x".into()]);
15942        assert!(idx.any());
15943        assert!(!idx.all());
15944    }
15945
15946    #[test]
15947    fn index_to_list_returns_owned_labels() {
15948        let idx = Index::from_i64(vec![1, 2, 3]);
15949        assert_eq!(
15950            idx.to_list(),
15951            vec![
15952                IndexLabel::Int64(1),
15953                IndexLabel::Int64(2),
15954                IndexLabel::Int64(3),
15955            ]
15956        );
15957    }
15958
15959    #[test]
15960    fn index_format_stringifies_labels() {
15961        let idx = Index::new(vec![
15962            IndexLabel::Int64(10),
15963            IndexLabel::Utf8("abc".into()),
15964            IndexLabel::Int64(-5),
15965        ]);
15966        assert_eq!(idx.format(), vec!["10", "abc", "-5"]);
15967    }
15968
15969    #[test]
15970    fn index_putmask_replaces_true_positions() {
15971        let idx = Index::from_i64(vec![1, 2, 3, 4]).set_name("k");
15972        let cond = vec![false, true, false, true];
15973        let replaced = idx.putmask(&cond, &IndexLabel::Int64(0));
15974        assert_eq!(
15975            replaced.labels(),
15976            &[
15977                IndexLabel::Int64(1),
15978                IndexLabel::Int64(0),
15979                IndexLabel::Int64(3),
15980                IndexLabel::Int64(0),
15981            ]
15982        );
15983        assert_eq!(replaced.name(), Some("k"));
15984    }
15985
15986    #[test]
15987    fn index_putmask_short_cond_leaves_tail_unchanged() {
15988        let idx = Index::from_i64(vec![1, 2, 3, 4]);
15989        // cond shorter than index — trailing positions keep original
15990        // labels (matches pandas lenient alignment).
15991        let cond = vec![true];
15992        let replaced = idx.putmask(&cond, &IndexLabel::Int64(-1));
15993        assert_eq!(
15994            replaced.labels(),
15995            &[
15996                IndexLabel::Int64(-1),
15997                IndexLabel::Int64(2),
15998                IndexLabel::Int64(3),
15999                IndexLabel::Int64(4),
16000            ]
16001        );
16002    }
16003
16004    #[test]
16005    fn index_putmask_empty_cond_is_noop() {
16006        let idx = Index::from_i64(vec![1, 2]);
16007        let replaced = idx.putmask(&[], &IndexLabel::Int64(0));
16008        assert_eq!(replaced.labels(), idx.labels());
16009    }
16010
16011    #[test]
16012    fn index_asof_finds_largest_not_exceeding() {
16013        let idx = Index::from_i64(vec![1, 3, 5, 7]);
16014        assert_eq!(idx.asof(&IndexLabel::Int64(4)), Some(IndexLabel::Int64(3)));
16015        assert_eq!(idx.asof(&IndexLabel::Int64(5)), Some(IndexLabel::Int64(5)));
16016        assert_eq!(idx.asof(&IndexLabel::Int64(7)), Some(IndexLabel::Int64(7)));
16017        assert_eq!(
16018            idx.asof(&IndexLabel::Int64(100)),
16019            Some(IndexLabel::Int64(7))
16020        );
16021    }
16022
16023    #[test]
16024    fn index_asof_before_first_returns_none() {
16025        let idx = Index::from_i64(vec![5, 10]);
16026        assert_eq!(idx.asof(&IndexLabel::Int64(0)), None);
16027    }
16028
16029    #[test]
16030    fn index_searchsorted_left_right() {
16031        let idx = Index::from_i64(vec![1, 2, 2, 5]);
16032        assert_eq!(idx.searchsorted(&IndexLabel::Int64(2), "left").unwrap(), 1);
16033        assert_eq!(idx.searchsorted(&IndexLabel::Int64(2), "right").unwrap(), 3);
16034        assert_eq!(idx.searchsorted(&IndexLabel::Int64(0), "left").unwrap(), 0);
16035        assert_eq!(idx.searchsorted(&IndexLabel::Int64(6), "left").unwrap(), 4);
16036    }
16037
16038    #[test]
16039    fn index_searchsorted_rejects_invalid_side() {
16040        let idx = Index::from_i64(vec![1]);
16041        assert!(idx.searchsorted(&IndexLabel::Int64(0), "middle").is_err());
16042    }
16043
16044    #[test]
16045    fn index_memory_usage_counts_fixed_width() {
16046        let idx = Index::from_i64(vec![1, 2, 3]);
16047        let shallow = idx.memory_usage(false);
16048        assert_eq!(shallow, 24); // 3 * 8
16049        // deep is identical for fixed-width types.
16050        assert_eq!(idx.memory_usage(true), 24);
16051    }
16052
16053    #[test]
16054    fn index_memory_usage_deep_counts_utf8_bytes() {
16055        let idx = Index::new(vec![
16056            IndexLabel::Utf8("hi".into()),
16057            IndexLabel::Utf8("world".into()),
16058        ]);
16059        let shallow = idx.memory_usage(false);
16060        let deep = idx.memory_usage(true);
16061        // deep - shallow == sum of string byte lengths
16062        assert_eq!(deep - shallow, 7);
16063    }
16064
16065    #[test]
16066    fn index_nlevels_flat_index_is_one() {
16067        let idx = Index::from_i64(vec![1, 2]);
16068        assert_eq!(idx.nlevels(), 1);
16069    }
16070
16071    #[test]
16072    fn index_where_cond() {
16073        let idx = Index::new(vec!["a".into(), "b".into(), "c".into()]);
16074        let cond = vec![true, false, true];
16075        let result = idx.where_cond(&cond, &"X".into());
16076        assert_eq!(result.labels()[0], IndexLabel::Utf8("a".into()));
16077        assert_eq!(result.labels()[1], IndexLabel::Utf8("X".into()));
16078        assert_eq!(result.labels()[2], IndexLabel::Utf8("c".into()));
16079    }
16080
16081    #[test]
16082    fn index_a31qh_conversion_aliases_materialize_labels() {
16083        let idx = Index::new(vec!["a".into(), "b".into()]).set_name("key");
16084        let labels = vec![IndexLabel::from("a"), IndexLabel::from("b")];
16085
16086        assert_eq!(idx.tolist(), labels);
16087        assert_eq!(idx.to_numpy(), labels);
16088        assert_eq!(idx.array(), labels);
16089        assert_eq!(idx.values(), labels);
16090        assert_eq!(idx.ravel(), labels);
16091        assert_eq!(idx.view(), idx);
16092        assert_eq!(idx.transpose(), idx);
16093        assert_eq!(idx.T(), idx);
16094        assert_eq!(
16095            idx.to_frame(),
16096            vec![vec![IndexLabel::from("a")], vec![IndexLabel::from("b")]]
16097        );
16098        assert_eq!(
16099            idx.to_series(),
16100            vec![
16101                (IndexLabel::from("a"), IndexLabel::from("a")),
16102                (IndexLabel::from("b"), IndexLabel::from("b")),
16103            ]
16104        );
16105    }
16106
16107    #[test]
16108    fn index_a31qh_dtype_metadata_and_type_checks() {
16109        let ints = Index::from_i64(vec![1, 2, 3]);
16110        assert_eq!(ints.dtype(), "int64");
16111        assert_eq!(ints.dtypes(), vec!["int64"]);
16112        assert_eq!(ints.inferred_type(), "integer");
16113        assert!(ints.holds_integer());
16114        assert!(ints.is_integer());
16115        assert!(ints.is_numeric());
16116        assert!(!ints.is_object());
16117        assert_eq!(ints.ndim(), 1);
16118        assert_eq!(ints.shape(), (3,));
16119        assert_eq!(ints.size(), 3);
16120        assert_eq!(ints.nbytes(), ints.memory_usage(false));
16121        assert!(!ints.empty());
16122        assert_eq!(
16123            Index::from_i64(vec![42]).item().unwrap(),
16124            IndexLabel::Int64(42)
16125        );
16126        assert!(ints.item().is_err());
16127
16128        let mixed = Index::new(vec![
16129            IndexLabel::Int64(1),
16130            IndexLabel::Utf8("x".into()),
16131            IndexLabel::Datetime64(i64::MIN),
16132        ]);
16133        assert_eq!(mixed.dtype(), "object");
16134        assert_eq!(mixed.inferred_type(), "mixed");
16135        assert!(mixed.is_object());
16136        assert!(mixed.hasnans());
16137        assert_eq!(mixed.isnull(), mixed.isna());
16138        assert_eq!(mixed.notnull(), mixed.notna());
16139        assert!(!mixed.is_boolean());
16140        assert!(!mixed.is_categorical());
16141        assert!(!mixed.is_floating());
16142        assert!(!mixed.is_interval());
16143        assert_eq!(mixed.infer_objects(), mixed);
16144        assert!(ints.is_(&ints));
16145        assert!(!ints.is_(&Index::from_i64(vec![1, 2, 3])));
16146    }
16147
16148    #[test]
16149    fn index_a31qh_factorize_reindex_and_non_unique_indexer() {
16150        let idx = Index::new(vec![
16151            IndexLabel::Utf8("a".into()),
16152            IndexLabel::Utf8("b".into()),
16153            IndexLabel::Utf8("a".into()),
16154            IndexLabel::Datetime64(i64::MIN),
16155        ])
16156        .set_name("letters");
16157
16158        let (codes, uniques) = idx.factorize();
16159        assert_eq!(codes, vec![0, 1, 0, -1]);
16160        assert_eq!(
16161            uniques.labels(),
16162            &[IndexLabel::from("a"), IndexLabel::from("b")]
16163        );
16164        assert_eq!(uniques.name(), Some("letters"));
16165
16166        let target = Index::new(vec![
16167            IndexLabel::Utf8("a".into()),
16168            IndexLabel::Utf8("z".into()),
16169            IndexLabel::Utf8("b".into()),
16170        ]);
16171        assert_eq!(idx.get_indexer_for(&target), vec![Some(0), None, Some(1)]);
16172        assert_eq!(
16173            idx.get_indexer_non_unique(&target),
16174            (vec![0, 2, -1, 1], vec![1])
16175        );
16176
16177        let (reindexed, positions) = idx.reindex(&target);
16178        assert_eq!(reindexed, target);
16179        assert_eq!(positions, vec![Some(0), None, Some(1)]);
16180    }
16181
16182    #[test]
16183    fn index_a31qh_set_sort_slice_and_level_aliases() {
16184        let idx = Index::from_i64(vec![3, 1, 2]).set_name("n");
16185        let sorted = idx.sort();
16186        assert_eq!(
16187            sorted.labels(),
16188            &[
16189                IndexLabel::Int64(1),
16190                IndexLabel::Int64(2),
16191                IndexLabel::Int64(3),
16192            ]
16193        );
16194        let (sortlevel, order) = idx.sortlevel();
16195        assert_eq!(sortlevel, sorted);
16196        assert_eq!(order, vec![1, 2, 0]);
16197
16198        let other = Index::from_i64(vec![2, 4]);
16199        assert_eq!(idx.union(&other), idx.union_with(&other));
16200        assert_eq!(
16201            idx.drop(&[IndexLabel::Int64(1)]),
16202            idx.drop_labels(&[IndexLabel::Int64(1)])
16203        );
16204        assert_eq!(idx.copy(), idx);
16205        assert_eq!(
16206            idx.where_(&[true, false, true], &IndexLabel::Int64(0))
16207                .labels()[1],
16208            IndexLabel::Int64(0)
16209        );
16210        assert_eq!(idx.get_level_values(0).unwrap(), idx);
16211        assert!(idx.get_level_values(1).is_err());
16212        assert!(idx.droplevel(0).is_err());
16213
16214        let sorted_lookup = Index::from_i64(vec![1, 2, 2, 4]);
16215        assert_eq!(
16216            sorted_lookup
16217                .get_slice_bound(&IndexLabel::Int64(2), "left")
16218                .unwrap(),
16219            1
16220        );
16221        assert_eq!(
16222            sorted_lookup
16223                .slice_locs(Some(&IndexLabel::Int64(2)), Some(&IndexLabel::Int64(4)))
16224                .unwrap(),
16225            (1, 4)
16226        );
16227        assert_eq!(
16228            sorted_lookup
16229                .slice_indexer(Some(&IndexLabel::Int64(2)), Some(&IndexLabel::Int64(2)))
16230                .unwrap(),
16231            (1, 3)
16232        );
16233    }
16234
16235    #[test]
16236    fn index_a31qh_astype_str_groupby_join_asof_and_diff() {
16237        let idx = Index::new(vec![
16238            IndexLabel::Utf8("Alpha".into()),
16239            IndexLabel::Utf8("beta".into()),
16240            IndexLabel::Int64(7),
16241        ]);
16242        assert_eq!(
16243            idx.r#str().lower(),
16244            vec![Some("alpha".to_owned()), Some("beta".to_owned()), None]
16245        );
16246        assert_eq!(
16247            idx.r#str().upper(),
16248            vec![Some("ALPHA".to_owned()), Some("BETA".to_owned()), None]
16249        );
16250        assert_eq!(
16251            idx.r#str().contains("ta"),
16252            vec![Some(false), Some(true), None]
16253        );
16254        assert_eq!(idx.r#str().len(), vec![Some(5), Some(4), None]);
16255        assert_eq!(idx.r#str().is_empty(), vec![Some(false), Some(false), None]);
16256        assert!(idx.astype("object").is_ok());
16257        assert!(idx.astype("float64").is_err());
16258
16259        let grouped = Index::new(vec!["a".into(), "b".into(), "a".into()]).groupby();
16260        assert_eq!(grouped[&IndexLabel::from("a")], vec![0, 2]);
16261        assert_eq!(grouped[&IndexLabel::from("b")], vec![1]);
16262
16263        let left = Index::from_i64(vec![1, 2, 3]);
16264        let right = Index::from_i64(vec![2, 4]);
16265        assert_eq!(
16266            left.join(&right, "inner").unwrap(),
16267            left.intersection(&right)
16268        );
16269        assert_eq!(left.join(&right, "outer").unwrap(), left.union_with(&right));
16270        assert_eq!(left.join(&right, "left").unwrap(), left);
16271        assert_eq!(left.join(&right, "right").unwrap(), right);
16272        assert!(left.join(&right, "sideways").is_err());
16273
16274        let sorted = Index::from_i64(vec![1, 3, 5, 7]);
16275        let probes = Index::from_i64(vec![0, 3, 4, 8]);
16276        assert_eq!(
16277            sorted.asof_locs(&probes, None),
16278            vec![None, Some(1), Some(1), Some(3)]
16279        );
16280        assert_eq!(
16281            sorted.asof_locs(&probes, Some(&[true, false, true, true])),
16282            vec![None, Some(0), Some(0), Some(3)]
16283        );
16284
16285        assert_eq!(
16286            sorted.diff(1),
16287            vec![
16288                None,
16289                Some(IndexLabel::Int64(2)),
16290                Some(IndexLabel::Int64(2)),
16291                Some(IndexLabel::Int64(2)),
16292            ]
16293        );
16294        let datetimes = Index::from_datetime64(vec![10, 25]);
16295        assert_eq!(
16296            datetimes.diff(1),
16297            vec![None, Some(IndexLabel::Timedelta64(15))]
16298        );
16299    }
16300
16301    // ── Index name tests ────────────────────────────────────────────
16302
16303    #[test]
16304    fn index_name_default_none() {
16305        let idx = Index::new(vec![1_i64.into(), 2_i64.into()]);
16306        assert_eq!(idx.name(), None);
16307    }
16308
16309    #[test]
16310    fn index_set_name() {
16311        let idx = Index::new(vec![1_i64.into(), 2_i64.into()]);
16312        let named = idx.set_name("year");
16313        assert_eq!(named.name(), Some("year"));
16314        assert_eq!(named.labels(), idx.labels());
16315    }
16316
16317    #[test]
16318    fn index_set_names_some_and_none() {
16319        let idx = Index::new(vec!["a".into(), "b".into()]);
16320        let named = idx.set_names(Some("letters"));
16321        assert_eq!(named.name(), Some("letters"));
16322        let cleared = named.set_names(None);
16323        assert_eq!(cleared.name(), None);
16324    }
16325
16326    #[test]
16327    fn index_name_propagates_through_unique() {
16328        let idx = Index::new(vec![1_i64.into(), 1_i64.into(), 2_i64.into()]).set_name("id");
16329        let u = idx.unique();
16330        assert_eq!(u.name(), Some("id"));
16331        assert_eq!(u.len(), 2);
16332    }
16333
16334    #[test]
16335    fn index_name_propagates_through_sort_values() {
16336        let idx = Index::new(vec![3_i64.into(), 1_i64.into(), 2_i64.into()]).set_name("val");
16337        let sorted = idx.sort_values();
16338        assert_eq!(sorted.name(), Some("val"));
16339    }
16340
16341    #[test]
16342    fn index_name_propagates_through_take_and_slice() {
16343        let idx = Index::new(vec!["a".into(), "b".into(), "c".into()]).set_name("letter");
16344        assert_eq!(idx.take(&[0, 2]).name(), Some("letter"));
16345        assert_eq!(idx.slice(1, 2).name(), Some("letter"));
16346    }
16347
16348    #[test]
16349    fn index_name_propagates_through_map() {
16350        let idx = Index::new(vec![1_i64.into(), 2_i64.into()]).set_name("x");
16351        let mapped = idx.map(|l| match l {
16352            IndexLabel::Int64(v) => IndexLabel::Int64(v * 10),
16353            other => other.clone(),
16354        });
16355        assert_eq!(mapped.name(), Some("x"));
16356    }
16357
16358    #[test]
16359    fn index_name_propagates_through_drop_labels() {
16360        let idx = Index::new(vec![1_i64.into(), 2_i64.into(), 3_i64.into()]).set_name("num");
16361        let dropped = idx.drop_labels(&[2_i64.into()]);
16362        assert_eq!(dropped.name(), Some("num"));
16363        assert_eq!(dropped.len(), 2);
16364    }
16365
16366    #[test]
16367    fn index_name_propagates_through_astype() {
16368        let idx = Index::new(vec![1_i64.into(), 2_i64.into()]).set_name("n");
16369        assert_eq!(idx.astype_str().name(), Some("n"));
16370        let idx2 = Index::new(vec!["1".into(), "2".into()]).set_name("s");
16371        assert_eq!(idx2.astype_int().name(), Some("s"));
16372    }
16373
16374    #[test]
16375    fn index_name_shared_for_intersection() {
16376        let a = Index::new(vec![1_i64.into(), 2_i64.into()]).set_name("x");
16377        let b = Index::new(vec![2_i64.into(), 3_i64.into()]).set_name("x");
16378        assert_eq!(a.intersection(&b).name(), Some("x"));
16379
16380        let c = Index::new(vec![2_i64.into(), 3_i64.into()]).set_name("y");
16381        assert_eq!(a.intersection(&c).name(), None);
16382    }
16383
16384    #[test]
16385    fn index_name_shared_for_union() {
16386        let a = Index::new(vec![1_i64.into()]).set_name("k");
16387        let b = Index::new(vec![2_i64.into()]).set_name("k");
16388        assert_eq!(a.union_with(&b).name(), Some("k"));
16389
16390        let c = Index::new(vec![2_i64.into()]);
16391        assert_eq!(a.union_with(&c).name(), None);
16392    }
16393
16394    #[test]
16395    fn index_name_propagates_through_where_cond() {
16396        let idx = Index::new(vec!["a".into(), "b".into()]).set_name("col");
16397        let result = idx.where_cond(&[true, false], &"Z".into());
16398        assert_eq!(result.name(), Some("col"));
16399    }
16400
16401    #[test]
16402    fn index_rename_index() {
16403        let idx = Index::new(vec![1_i64.into()]);
16404        let named = idx.rename_index(Some("foo"));
16405        assert_eq!(named.name(), Some("foo"));
16406        let cleared = named.rename_index(None);
16407        assert_eq!(cleared.name(), None);
16408    }
16409
16410    #[test]
16411    fn index_equality_ignores_name() {
16412        let a = Index::new(vec![1_i64.into(), 2_i64.into()]).set_name("a");
16413        let b = Index::new(vec![1_i64.into(), 2_i64.into()]).set_name("b");
16414        assert_eq!(a, b);
16415    }
16416
16417    #[test]
16418    fn index_names_property() {
16419        let idx = Index::new(vec![1_i64.into()]);
16420        assert_eq!(idx.names(), vec![None]);
16421        let named = idx.set_name("x");
16422        assert_eq!(named.names(), vec![Some("x".to_string())]);
16423    }
16424
16425    #[test]
16426    fn index_set_names_list() {
16427        let idx = Index::new(vec![1_i64.into()]);
16428        let named = idx.set_names_list(&[Some("foo")]);
16429        assert_eq!(named.name(), Some("foo"));
16430        let cleared = named.set_names_list(&[None]);
16431        assert_eq!(cleared.name(), None);
16432    }
16433
16434    #[test]
16435    fn index_to_flat_index() {
16436        let idx = Index::new(vec!["a".into(), "b".into()]).set_name("x");
16437        let flat = idx.to_flat_index();
16438        assert_eq!(flat, idx);
16439        assert_eq!(flat.name(), Some("x"));
16440    }
16441
16442    // ── MultiIndex tests ──
16443
16444    #[test]
16445    fn multi_index_from_tuples() {
16446        let mi = MultiIndex::from_tuples(vec![
16447            vec!["a".into(), 1_i64.into()],
16448            vec!["a".into(), 2_i64.into()],
16449            vec!["b".into(), 1_i64.into()],
16450        ])
16451        .unwrap();
16452
16453        assert_eq!(mi.nlevels(), 2);
16454        assert_eq!(mi.len(), 3);
16455        assert!(!mi.is_empty());
16456    }
16457
16458    #[test]
16459    fn multi_index_from_tuples_ragged_errors() {
16460        let err = MultiIndex::from_tuples(vec![
16461            vec!["a".into(), 1_i64.into()],
16462            vec!["b".into()], // wrong number of levels
16463        ]);
16464        assert!(err.is_err());
16465    }
16466
16467    #[test]
16468    fn multi_index_from_arrays() {
16469        let mi = MultiIndex::from_arrays(vec![
16470            vec!["a".into(), "a".into(), "b".into()],
16471            vec![1_i64.into(), 2_i64.into(), 1_i64.into()],
16472        ])
16473        .unwrap();
16474
16475        assert_eq!(mi.nlevels(), 2);
16476        assert_eq!(mi.len(), 3);
16477    }
16478
16479    #[test]
16480    fn multi_index_from_arrays_length_mismatch_errors() {
16481        let err = MultiIndex::from_arrays(vec![
16482            vec!["a".into(), "b".into()],
16483            vec![1_i64.into()], // wrong length
16484        ]);
16485        assert!(err.is_err());
16486    }
16487
16488    #[test]
16489    fn multi_index_from_frame_preserves_column_names_a1dv9() {
16490        let mi = MultiIndex::from_frame(vec![
16491            (
16492                Some("letter".into()),
16493                vec!["a".into(), "b".into(), "b".into()],
16494            ),
16495            (
16496                Some("number".into()),
16497                vec![1_i64.into(), 1_i64.into(), 2_i64.into()],
16498            ),
16499        ])
16500        .unwrap();
16501
16502        assert_eq!(mi.names(), &[Some("letter".into()), Some("number".into())]);
16503        assert_eq!(
16504            mi.to_list(),
16505            vec![
16506                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
16507                vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(1)],
16508                vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
16509            ]
16510        );
16511
16512        let empty = MultiIndex::from_frame(Vec::new()).unwrap();
16513        assert!(empty.is_empty());
16514        assert_eq!(empty.nlevels(), 0);
16515    }
16516
16517    #[test]
16518    fn multi_index_from_frame_rejects_length_mismatch_a1dv9() {
16519        let err = MultiIndex::from_frame(vec![
16520            (Some("letter".into()), vec!["a".into(), "b".into()]),
16521            (Some("number".into()), vec![1_i64.into()]),
16522        ])
16523        .unwrap_err();
16524
16525        assert!(matches!(
16526            err,
16527            super::IndexError::LengthMismatch {
16528                expected: 2,
16529                actual: 1,
16530                ..
16531            }
16532        ));
16533    }
16534
16535    #[test]
16536    fn multi_index_from_product() {
16537        let mi = MultiIndex::from_product(vec![
16538            vec!["a".into(), "b".into()],
16539            vec![1_i64.into(), 2_i64.into(), 3_i64.into()],
16540        ])
16541        .unwrap();
16542
16543        assert_eq!(mi.nlevels(), 2);
16544        assert_eq!(mi.len(), 6); // 2 * 3
16545    }
16546
16547    #[test]
16548    fn multi_index_from_product_values() {
16549        let mi = MultiIndex::from_product(vec![
16550            vec!["x".into(), "y".into()],
16551            vec![1_i64.into(), 2_i64.into()],
16552        ])
16553        .unwrap();
16554
16555        // Should produce: (x,1), (x,2), (y,1), (y,2)
16556        assert_eq!(
16557            mi.get_tuple(0).unwrap(),
16558            vec![&IndexLabel::Utf8("x".into()), &IndexLabel::Int64(1)]
16559        );
16560        assert_eq!(
16561            mi.get_tuple(1).unwrap(),
16562            vec![&IndexLabel::Utf8("x".into()), &IndexLabel::Int64(2)]
16563        );
16564        assert_eq!(
16565            mi.get_tuple(2).unwrap(),
16566            vec![&IndexLabel::Utf8("y".into()), &IndexLabel::Int64(1)]
16567        );
16568        assert_eq!(
16569            mi.get_tuple(3).unwrap(),
16570            vec![&IndexLabel::Utf8("y".into()), &IndexLabel::Int64(2)]
16571        );
16572    }
16573
16574    #[test]
16575    fn multi_index_get_level_values() {
16576        let mi = MultiIndex::from_tuples(vec![
16577            vec!["a".into(), 1_i64.into()],
16578            vec!["b".into(), 2_i64.into()],
16579        ])
16580        .unwrap()
16581        .set_names(vec![Some("letter".into()), Some("number".into())]);
16582
16583        let level0 = mi.get_level_values(0).unwrap();
16584        assert_eq!(
16585            level0.labels(),
16586            &[IndexLabel::Utf8("a".into()), IndexLabel::Utf8("b".into())]
16587        );
16588        assert_eq!(level0.name(), Some("letter"));
16589
16590        let level1 = mi.get_level_values(1).unwrap();
16591        assert_eq!(
16592            level1.labels(),
16593            &[IndexLabel::Int64(1), IndexLabel::Int64(2)]
16594        );
16595        assert_eq!(level1.name(), Some("number"));
16596    }
16597
16598    #[test]
16599    fn multi_index_get_level_values_out_of_bounds() {
16600        let mi = MultiIndex::from_tuples(vec![vec!["a".into()]]).unwrap();
16601        assert!(mi.get_level_values(1).is_err());
16602    }
16603
16604    #[test]
16605    fn multi_index_metadata_shape_and_tuple_materialization() {
16606        let mi = MultiIndex::from_tuples(vec![
16607            vec!["a".into(), 1_i64.into()],
16608            vec!["a".into(), 2_i64.into()],
16609            vec!["b".into(), 1_i64.into()],
16610        ])
16611        .unwrap()
16612        .set_names(vec![Some("letter".into()), Some("number".into())]);
16613
16614        let tuples = vec![
16615            vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
16616            vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)],
16617            vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(1)],
16618        ];
16619        assert_eq!(mi.name(), None);
16620        assert_eq!(mi.names(), &[Some("letter".into()), Some("number".into())]);
16621        assert_eq!(mi.size(), 3);
16622        assert_eq!(mi.shape(), (3,));
16623        assert_eq!(mi.ndim(), 1);
16624        assert!(!mi.empty());
16625        assert_eq!(mi.to_list(), tuples);
16626        assert_eq!(mi.tolist(), mi.to_list());
16627        assert_eq!(mi.to_numpy(), mi.to_list());
16628        assert_eq!(mi.values(), mi.to_list());
16629        assert_eq!(mi.array(), mi.to_list());
16630        assert_eq!(mi.ravel(), mi.to_list());
16631        assert_eq!(mi.format(), vec!["(a, 1)", "(a, 2)", "(b, 1)"]);
16632        assert_eq!(mi.view(), mi);
16633        assert_eq!(mi.transpose(), mi);
16634        assert_eq!(mi.T(), mi);
16635        assert_eq!(mi.to_frame(), tuples);
16636        assert_eq!(
16637            mi.to_series(),
16638            tuples
16639                .iter()
16640                .cloned()
16641                .map(|tuple| (tuple.clone(), tuple))
16642                .collect::<Vec<_>>()
16643        );
16644    }
16645
16646    #[test]
16647    fn multi_index_levels_codes_and_levshape_exclude_missing_labels() {
16648        let mi = MultiIndex::from_tuples(vec![
16649            vec!["a".into(), 1_i64.into()],
16650            vec![IndexLabel::Datetime64(i64::MIN), 2_i64.into()],
16651            vec!["a".into(), 1_i64.into()],
16652        ])
16653        .unwrap()
16654        .set_names(vec![Some("letter".into()), Some("number".into())]);
16655
16656        let levels = mi.levels();
16657        assert_eq!(levels[0].labels(), &[IndexLabel::Utf8("a".into())]);
16658        assert_eq!(levels[0].name(), Some("letter"));
16659        assert_eq!(
16660            levels[1].labels(),
16661            &[IndexLabel::Int64(1), IndexLabel::Int64(2)]
16662        );
16663        assert_eq!(levels[1].name(), Some("number"));
16664        assert_eq!(mi.codes(), vec![vec![0, -1, 0], vec![0, 1, 0]]);
16665        assert_eq!(mi.levshape(), vec![1, 2]);
16666        assert!(mi.memory_usage(false) <= mi.memory_usage(true));
16667        assert_eq!(mi.nbytes(), mi.memory_usage(false));
16668    }
16669
16670    #[test]
16671    fn multi_index_dtype_type_checks_and_item_match_object_index_shape() {
16672        let mi = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])
16673            .unwrap()
16674            .set_names(vec![Some("letter".into()), Some("number".into())]);
16675
16676        assert_eq!(mi.dtype(), "object");
16677        assert_eq!(mi.dtypes(), vec!["object", "int64"]);
16678        assert_eq!(mi.inferred_type(), "mixed");
16679        assert_eq!(mi.infer_objects(), mi);
16680        assert!(!mi.holds_integer());
16681        assert!(!mi.is_boolean());
16682        assert!(!mi.is_categorical());
16683        assert!(!mi.is_floating());
16684        assert!(!mi.is_integer());
16685        assert!(!mi.is_interval());
16686        assert!(!mi.is_numeric());
16687        assert!(mi.is_object());
16688        assert!(mi.is_(&mi));
16689        assert_eq!(
16690            mi.item().unwrap(),
16691            vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)]
16692        );
16693
16694        let multi = mi.repeat(2);
16695        assert!(multi.item().is_err());
16696    }
16697
16698    #[test]
16699    fn multi_index_missing_masks_fillna_putmask_where_and_map() {
16700        let mi = MultiIndex::from_tuples(vec![
16701            vec!["a".into(), 1_i64.into()],
16702            vec![IndexLabel::Datetime64(i64::MIN), 2_i64.into()],
16703            vec!["b".into(), IndexLabel::Timedelta64(Timedelta::NAT)],
16704            vec!["c".into(), 3_i64.into()],
16705        ])
16706        .unwrap()
16707        .set_names(vec![Some("letter".into()), Some("number".into())]);
16708
16709        let missing_mask_errors = [
16710            mi.hasnans().unwrap_err(),
16711            mi.isna().unwrap_err(),
16712            mi.isnull().unwrap_err(),
16713            mi.notna().unwrap_err(),
16714            mi.notnull().unwrap_err(),
16715        ];
16716        for err in missing_mask_errors {
16717            assert!(matches!(
16718                err,
16719                super::IndexError::InvalidArgument(message)
16720                    if message == "isna is not defined for MultiIndex"
16721            ));
16722        }
16723        assert_eq!(mi.copy(), mi);
16724        assert_eq!(mi.remove_unused_levels(), mi);
16725
16726        let scalar_filled = mi.fillna(&IndexLabel::Utf8("missing".into()));
16727        assert_eq!(
16728            scalar_filled.to_list(),
16729            vec![
16730                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
16731                vec![IndexLabel::Utf8("missing".into()), IndexLabel::Int64(2)],
16732                vec![
16733                    IndexLabel::Utf8("b".into()),
16734                    IndexLabel::Utf8("missing".into())
16735                ],
16736                vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)],
16737            ]
16738        );
16739
16740        let tuple_filled = mi
16741            .fillna_tuple(&[IndexLabel::Utf8("z".into()), IndexLabel::Int64(0)])
16742            .unwrap();
16743        assert_eq!(
16744            tuple_filled.to_list(),
16745            vec![
16746                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
16747                vec![IndexLabel::Utf8("z".into()), IndexLabel::Int64(2)],
16748                vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(0)],
16749                vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)],
16750            ]
16751        );
16752        assert!(
16753            mi.fillna_tuple(&[IndexLabel::Utf8("short".into())])
16754                .is_err()
16755        );
16756
16757        let masked = mi
16758            .putmask(
16759                &[false, true, false, true],
16760                vec![IndexLabel::Utf8("x".into()), IndexLabel::Int64(9)],
16761            )
16762            .unwrap();
16763        assert_eq!(
16764            masked.to_list(),
16765            vec![
16766                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
16767                vec![IndexLabel::Utf8("x".into()), IndexLabel::Int64(9)],
16768                vec![
16769                    IndexLabel::Utf8("b".into()),
16770                    IndexLabel::Timedelta64(Timedelta::NAT)
16771                ],
16772                vec![IndexLabel::Utf8("x".into()), IndexLabel::Int64(9)],
16773            ]
16774        );
16775        assert!(
16776            mi.putmask(&[true], vec![IndexLabel::Utf8("x".into())])
16777                .is_err()
16778        );
16779
16780        let where_result = mi
16781            .r#where(
16782                &[true, false, true, false],
16783                vec![IndexLabel::Utf8("fallback".into()), IndexLabel::Int64(5)],
16784            )
16785            .unwrap();
16786        assert_eq!(
16787            where_result.to_list(),
16788            vec![
16789                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
16790                vec![IndexLabel::Utf8("fallback".into()), IndexLabel::Int64(5)],
16791                vec![
16792                    IndexLabel::Utf8("b".into()),
16793                    IndexLabel::Timedelta64(Timedelta::NAT)
16794                ],
16795                vec![IndexLabel::Utf8("fallback".into()), IndexLabel::Int64(5)],
16796            ]
16797        );
16798
16799        let rendered = mi.map(|tuple| {
16800            tuple
16801                .iter()
16802                .map(ToString::to_string)
16803                .collect::<Vec<_>>()
16804                .join("|")
16805        });
16806        assert_eq!(rendered[0], "a|1");
16807        assert_eq!(rendered[3], "c|3");
16808    }
16809
16810    #[test]
16811    fn multi_index_set_levels_and_set_codes_rebuild_from_pandas_catalogs() {
16812        let mi = MultiIndex::from_tuples(vec![
16813            vec!["a".into(), 1_i64.into()],
16814            vec!["b".into(), 2_i64.into()],
16815            vec!["a".into(), 1_i64.into()],
16816        ])
16817        .unwrap()
16818        .set_names(vec![Some("letter".into()), Some("number".into())]);
16819
16820        let relabeled = mi
16821            .set_levels(vec![
16822                vec![IndexLabel::Utf8("x".into()), IndexLabel::Utf8("y".into())],
16823                vec![IndexLabel::Int64(10), IndexLabel::Int64(20)],
16824            ])
16825            .unwrap();
16826        assert_eq!(
16827            relabeled.to_list(),
16828            vec![
16829                vec![IndexLabel::Utf8("x".into()), IndexLabel::Int64(10)],
16830                vec![IndexLabel::Utf8("y".into()), IndexLabel::Int64(20)],
16831                vec![IndexLabel::Utf8("x".into()), IndexLabel::Int64(10)],
16832            ]
16833        );
16834        assert_eq!(relabeled.names(), mi.names());
16835        assert!(
16836            mi.set_levels(vec![vec![IndexLabel::Utf8("only".into())]])
16837                .is_err()
16838        );
16839        assert!(
16840            mi.set_levels(vec![
16841                vec![IndexLabel::Utf8("x".into())],
16842                vec![IndexLabel::Int64(10), IndexLabel::Int64(20)],
16843            ])
16844            .is_err()
16845        );
16846
16847        let recoded = mi.set_codes(vec![vec![1, 0, 1], vec![1, -1, 0]]).unwrap();
16848        assert_eq!(
16849            recoded.to_list(),
16850            vec![
16851                vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
16852                vec![
16853                    IndexLabel::Utf8("a".into()),
16854                    IndexLabel::Datetime64(i64::MIN)
16855                ],
16856                vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(1)],
16857            ]
16858        );
16859        assert_eq!(recoded.names(), mi.names());
16860        assert!(mi.set_codes(vec![vec![0, 1, 0]]).is_err());
16861        assert!(mi.set_codes(vec![vec![0, 1], vec![0, 1, 0]]).is_err());
16862        assert!(mi.set_codes(vec![vec![0, 1, 0], vec![0, 99, 0]]).is_err());
16863    }
16864
16865    #[test]
16866    fn multi_index_equals_identical_and_equal_levels_match_pandas_names() {
16867        let left = MultiIndex::from_tuples(vec![
16868            vec!["a".into(), 1_i64.into()],
16869            vec!["b".into(), 2_i64.into()],
16870        ])
16871        .unwrap()
16872        .set_names(vec![Some("letter".into()), Some("number".into())]);
16873        let renamed = left
16874            .clone()
16875            .set_names(vec![Some("letter".into()), Some("other".into())]);
16876        let reordered = MultiIndex::from_tuples(vec![
16877            vec!["b".into(), 2_i64.into()],
16878            vec!["a".into(), 1_i64.into()],
16879        ])
16880        .unwrap()
16881        .set_names(vec![Some("letter".into()), Some("number".into())]);
16882
16883        assert!(left.equals(&renamed));
16884        assert!(!left.identical(&renamed));
16885        assert!(left.equal_levels(&renamed));
16886        assert!(!left.equals(&reordered));
16887        assert!(!left.equal_levels(&reordered));
16888    }
16889
16890    #[test]
16891    fn multi_index_to_flat_index() {
16892        let mi = MultiIndex::from_tuples(vec![
16893            vec!["a".into(), 1_i64.into()],
16894            vec!["b".into(), 2_i64.into()],
16895        ])
16896        .unwrap();
16897
16898        let flat = mi.to_flat_index("_");
16899        assert_eq!(flat.labels()[0], IndexLabel::Utf8("a_1".into()));
16900        assert_eq!(flat.labels()[1], IndexLabel::Utf8("b_2".into()));
16901    }
16902
16903    #[test]
16904    fn multi_index_droplevel() {
16905        let mi = MultiIndex::from_tuples(vec![
16906            vec!["a".into(), 1_i64.into(), "x".into()],
16907            vec!["b".into(), 2_i64.into(), "y".into()],
16908        ])
16909        .unwrap()
16910        .set_names(vec![
16911            Some("l0".into()),
16912            Some("l1".into()),
16913            Some("l2".into()),
16914        ]);
16915
16916        // Drop middle level -> 2 levels remain -> MultiIndex
16917        let result = mi.droplevel(1).unwrap();
16918        assert!(
16919            matches!(&result, super::MultiIndexOrIndex::Multi(_)),
16920            "expected MultiIndex after dropping from 3 levels"
16921        );
16922        if let super::MultiIndexOrIndex::Multi(mi2) = result {
16923            assert_eq!(mi2.nlevels(), 2);
16924            assert_eq!(mi2.names(), &[Some("l0".into()), Some("l2".into())]);
16925        }
16926    }
16927
16928    #[test]
16929    fn multi_index_droplevel_to_index() {
16930        let mi = MultiIndex::from_tuples(vec![
16931            vec!["a".into(), 1_i64.into()],
16932            vec!["b".into(), 2_i64.into()],
16933        ])
16934        .unwrap()
16935        .set_names(vec![Some("letter".into()), Some("number".into())]);
16936
16937        // Drop one level from 2 -> 1 level -> plain Index
16938        let result = mi.droplevel(0).unwrap();
16939        assert!(
16940            matches!(&result, super::MultiIndexOrIndex::Index(_)),
16941            "expected Index after dropping from 2 levels"
16942        );
16943        if let super::MultiIndexOrIndex::Index(idx) = result {
16944            assert_eq!(idx.labels(), &[IndexLabel::Int64(1), IndexLabel::Int64(2)]);
16945            assert_eq!(idx.name(), Some("number"));
16946        }
16947    }
16948
16949    #[test]
16950    fn multi_index_swaplevel() {
16951        let mi = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])
16952            .unwrap()
16953            .set_names(vec![Some("first".into()), Some("second".into())]);
16954
16955        let swapped = mi.swaplevel(0, 1).unwrap();
16956        assert_eq!(
16957            swapped.names(),
16958            &[Some("second".into()), Some("first".into())]
16959        );
16960        assert_eq!(
16961            swapped.get_tuple(0).unwrap(),
16962            vec![&IndexLabel::Int64(1), &IndexLabel::Utf8("a".into())]
16963        );
16964    }
16965
16966    #[test]
16967    fn multi_index_empty() {
16968        let mi = MultiIndex::from_tuples(vec![]).unwrap();
16969        assert_eq!(mi.nlevels(), 0);
16970        assert_eq!(mi.len(), 0);
16971        assert!(mi.is_empty());
16972    }
16973
16974    #[test]
16975    fn multi_index_get_tuple_out_of_bounds() {
16976        let mi = MultiIndex::from_tuples(vec![vec!["a".into()]]).unwrap();
16977        assert!(mi.get_tuple(1).is_none());
16978    }
16979
16980    #[test]
16981    fn multi_index_get_loc_tuple_exact_and_duplicates() {
16982        let mi = MultiIndex::from_arrays(vec![
16983            vec!["east".into(), "east".into(), "west".into(), "east".into()],
16984            vec!["A".into(), "B".into(), "A".into(), "A".into()],
16985        ])
16986        .unwrap();
16987
16988        let positions = mi
16989            .get_loc_tuple(&[
16990                IndexLabel::Utf8("east".into()),
16991                IndexLabel::Utf8("A".into()),
16992            ])
16993            .unwrap();
16994        assert_eq!(positions, vec![0, 3]);
16995    }
16996
16997    #[test]
16998    fn multi_index_get_loc_level_prefix_returns_remaining_index() {
16999        let mi = MultiIndex::from_arrays(vec![
17000            vec!["east".into(), "east".into(), "west".into()],
17001            vec!["A".into(), "B".into(), "A".into()],
17002        ])
17003        .unwrap()
17004        .set_names(vec![Some("region".into()), Some("product".into())]);
17005
17006        let (positions, remaining) = mi
17007            .get_loc_level(&[IndexLabel::Utf8("east".into())])
17008            .unwrap();
17009        assert_eq!(positions, vec![0, 1]);
17010        assert!(matches!(
17011            &remaining,
17012            Some(super::MultiIndexOrIndex::Index(index))
17013                if index.labels()
17014                    == [IndexLabel::Utf8("A".into()), IndexLabel::Utf8("B".into())]
17015                    && index.name() == Some("product")
17016        ));
17017    }
17018
17019    #[test]
17020    fn multi_index_groupby_join_groups_duplicate_tuples_d89fe3() {
17021        let mi = MultiIndex::from_tuples(vec![
17022            vec!["a".into(), 1_i64.into()],
17023            vec!["b".into(), 2_i64.into()],
17024            vec!["a".into(), 1_i64.into()],
17025        ])
17026        .unwrap();
17027
17028        let groups = mi.groupby();
17029        assert_eq!(
17030            groups[&vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)]],
17031            vec![0, 2]
17032        );
17033        assert_eq!(
17034            groups[&vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)]],
17035            vec![1]
17036        );
17037    }
17038
17039    #[test]
17040    fn multi_index_groupby_join_modes_d89fe3() {
17041        let left = MultiIndex::from_tuples(vec![
17042            vec!["a".into(), 1_i64.into()],
17043            vec!["b".into(), 2_i64.into()],
17044            vec!["c".into(), 3_i64.into()],
17045        ])
17046        .unwrap();
17047        let right = MultiIndex::from_tuples(vec![
17048            vec!["b".into(), 2_i64.into()],
17049            vec!["d".into(), 4_i64.into()],
17050        ])
17051        .unwrap();
17052
17053        assert_eq!(left.join(&right, "left").unwrap(), left);
17054        assert_eq!(left.join(&right, "right").unwrap(), right);
17055        assert_eq!(
17056            left.join(&right, "inner").unwrap().to_list(),
17057            vec![vec!["b".into(), 2_i64.into()]]
17058        );
17059        assert_eq!(
17060            left.join(&right, "outer").unwrap().to_list(),
17061            vec![
17062                vec!["a".into(), 1_i64.into()],
17063                vec!["b".into(), 2_i64.into()],
17064                vec!["c".into(), 3_i64.into()],
17065                vec!["d".into(), 4_i64.into()]
17066            ]
17067        );
17068    }
17069
17070    #[test]
17071    fn multi_index_groupby_join_rejects_bad_mode_and_level_mismatch_d89fe3() {
17072        let left = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]]).unwrap();
17073        let right = MultiIndex::from_tuples(vec![vec!["a".into()]]).unwrap();
17074
17075        assert!(left.join(&right, "sideways").is_err());
17076        assert!(left.join(&right, "inner").is_err());
17077        assert!(left.join(&right, "outer").is_err());
17078    }
17079
17080    #[test]
17081    fn multi_index_slice_locs_uses_lexicographic_bounds() {
17082        let mi = MultiIndex::from_arrays(vec![
17083            vec!["east".into(), "east".into(), "west".into(), "west".into()],
17084            vec![1_i64.into(), 2_i64.into(), 1_i64.into(), 2_i64.into()],
17085        ])
17086        .unwrap();
17087
17088        let (start, stop) = mi
17089            .slice_locs(
17090                Some(&[IndexLabel::Utf8("east".into()), IndexLabel::Int64(2)]),
17091                Some(&[IndexLabel::Utf8("west".into()), IndexLabel::Int64(1)]),
17092            )
17093            .unwrap();
17094        assert_eq!((start, stop), (1, 3));
17095    }
17096
17097    #[test]
17098    fn multi_index_slice_bound_partial_prefixes_d89fe2() {
17099        let mi = MultiIndex::from_arrays(vec![
17100            vec!["east".into(), "east".into(), "west".into(), "west".into()],
17101            vec![1_i64.into(), 2_i64.into(), 1_i64.into(), 2_i64.into()],
17102        ])
17103        .unwrap();
17104
17105        let east = [IndexLabel::Utf8("east".into())];
17106        assert_eq!(mi.get_slice_bound(&east, "left").unwrap(), 0);
17107        assert_eq!(mi.get_slice_bound(&east, "right").unwrap(), 2);
17108
17109        let west = [IndexLabel::Utf8("west".into())];
17110        assert_eq!(mi.slice_indexer(Some(&west), None).unwrap(), (2, 4));
17111        assert_eq!(mi.slice_indexer(None, Some(&east)).unwrap(), (0, 2));
17112    }
17113
17114    #[test]
17115    fn multi_index_slice_bound_full_tuple_and_missing_insert_d89fe2() {
17116        let mi = MultiIndex::from_arrays(vec![
17117            vec!["east".into(), "east".into(), "west".into(), "west".into()],
17118            vec![1_i64.into(), 2_i64.into(), 1_i64.into(), 2_i64.into()],
17119        ])
17120        .unwrap();
17121
17122        let exact = [IndexLabel::Utf8("east".into()), IndexLabel::Int64(2)];
17123        assert_eq!(mi.get_slice_bound(&exact, "left").unwrap(), 1);
17124        assert_eq!(mi.get_slice_bound(&exact, "right").unwrap(), 2);
17125
17126        let missing_insert = [IndexLabel::Utf8("east".into()), IndexLabel::Int64(3)];
17127        assert_eq!(mi.get_slice_bound(&missing_insert, "left").unwrap(), 2);
17128        assert_eq!(mi.get_slice_bound(&missing_insert, "right").unwrap(), 2);
17129    }
17130
17131    #[test]
17132    fn multi_index_slice_bound_rejects_invalid_side_d89fe2() {
17133        let mi = MultiIndex::from_tuples(vec![vec![IndexLabel::Utf8("east".into())]]).unwrap();
17134        let key = [IndexLabel::Utf8("east".into())];
17135
17136        assert!(mi.get_slice_bound(&key, "middle").is_err());
17137    }
17138
17139    #[test]
17140    fn multi_index_truncate_uses_prefix_bounds_d89fe11() -> Result<(), super::IndexError> {
17141        let mi = MultiIndex::from_tuples(vec![
17142            vec!["a".into(), 1_i64.into()],
17143            vec!["a".into(), 3_i64.into()],
17144            vec!["b".into(), 1_i64.into()],
17145            vec!["c".into(), 1_i64.into()],
17146        ])?
17147        .set_names(vec![Some("letter".into()), Some("number".into())]);
17148
17149        let bounded = mi.truncate(
17150            Some(&[IndexLabel::Utf8("a".into())]),
17151            Some(&[IndexLabel::Utf8("b".into())]),
17152        )?;
17153        assert_eq!(
17154            bounded.to_list(),
17155            vec![
17156                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
17157                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(3)],
17158                vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(1)],
17159            ]
17160        );
17161        assert_eq!(bounded.names(), mi.names());
17162
17163        let tail = mi.truncate(Some(&[IndexLabel::Utf8("b".into())]), None)?;
17164        assert_eq!(
17165            tail.to_list(),
17166            vec![
17167                vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(1)],
17168                vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(1)],
17169            ]
17170        );
17171
17172        let clipped = mi.truncate(None, Some(&[IndexLabel::Utf8("aa".into())]))?;
17173        assert_eq!(
17174            clipped.to_list(),
17175            vec![
17176                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
17177                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(3)],
17178            ]
17179        );
17180
17181        let empty = mi.truncate(Some(&[IndexLabel::Utf8("d".into())]), None)?;
17182        assert!(empty.is_empty());
17183        assert_eq!(empty.names(), mi.names());
17184
17185        Ok(())
17186    }
17187
17188    #[test]
17189    fn multi_index_get_locs_prefix_and_exact_selectors_d89fe10() -> Result<(), super::IndexError> {
17190        let mi = MultiIndex::from_tuples(vec![
17191            vec!["a".into(), 1_i64.into()],
17192            vec!["a".into(), 2_i64.into()],
17193            vec!["b".into(), 1_i64.into()],
17194            vec!["b".into(), 2_i64.into()],
17195        ])?;
17196
17197        assert_eq!(mi.get_locs(&[IndexLabel::Utf8("a".into())])?, vec![0, 1]);
17198        assert_eq!(
17199            mi.get_locs(&[IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)])?,
17200            vec![0]
17201        );
17202        assert_eq!(mi.get_locs(&[])?, Vec::<usize>::new());
17203
17204        Ok(())
17205    }
17206
17207    #[test]
17208    fn multi_index_get_locs_rejects_missing_and_overlong_keys_d89fe10()
17209    -> Result<(), super::IndexError> {
17210        let mi = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?;
17211
17212        assert!(mi.get_locs(&[IndexLabel::Utf8("z".into())]).is_err());
17213        assert!(
17214            mi.get_locs(&[
17215                IndexLabel::Utf8("a".into()),
17216                IndexLabel::Int64(1),
17217                IndexLabel::Utf8("extra".into()),
17218            ])
17219            .is_err()
17220        );
17221
17222        Ok(())
17223    }
17224
17225    #[test]
17226    fn multi_index_get_indexer_non_unique_expands_duplicate_matches() {
17227        let source = MultiIndex::from_tuples(vec![
17228            vec!["a".into(), 1_i64.into()],
17229            vec!["a".into(), 2_i64.into()],
17230            vec!["b".into(), 1_i64.into()],
17231            vec!["a".into(), 1_i64.into()],
17232        ])
17233        .unwrap();
17234        let target = MultiIndex::from_tuples(vec![
17235            vec!["a".into(), 1_i64.into()],
17236            vec!["z".into(), 9_i64.into()],
17237            vec!["a".into(), 2_i64.into()],
17238            vec!["a".into(), 1_i64.into()],
17239        ])
17240        .unwrap();
17241
17242        let (indexer, missing) = source.get_indexer_non_unique(&target);
17243        assert_eq!(indexer, vec![0, 3, -1, 1, 0, 3]);
17244        assert_eq!(missing, vec![1]);
17245    }
17246
17247    #[test]
17248    fn multi_index_setop_packed_matches_reference_misetop() {
17249        // intersection/difference packed path must equal an independent
17250        // tuple-set reference (mixed Utf8+Int64 levels, duplicate self rows,
17251        // partial overlap, disjoint, and empty other).
17252        let mk = |spec: &[(&str, i64)]| {
17253            MultiIndex::from_tuples(
17254                spec.iter()
17255                    .map(|(s, i)| vec![IndexLabel::Utf8((*s).to_string()), IndexLabel::Int64(*i)])
17256                    .collect::<Vec<_>>(),
17257            )
17258            .unwrap()
17259        };
17260        let cases: Vec<(Vec<(&str, i64)>, Vec<(&str, i64)>)> = vec![
17261            (
17262                vec![("a", 1), ("b", 2), ("a", 1), ("c", 3), ("b", 2)],
17263                vec![("b", 2), ("c", 3), ("z", 9)],
17264            ),
17265            (vec![("a", 1), ("b", 2)], vec![("x", 7), ("y", 8)]),
17266            (vec![("a", 1), ("a", 1), ("b", 2)], vec![("a", 1)]),
17267        ];
17268        for (sa, sb) in cases {
17269            let a = mk(&sa);
17270            let b = mk(&sb);
17271            let bset: std::collections::HashSet<Vec<IndexLabel>> = b.to_list().into_iter().collect();
17272
17273            let mut seen = std::collections::HashSet::new();
17274            let ref_inter: Vec<Vec<IndexLabel>> = a
17275                .to_list()
17276                .into_iter()
17277                .filter(|t| bset.contains(t) && seen.insert(t.clone()))
17278                .collect();
17279            assert_eq!(a.intersection(&b).unwrap().to_list(), ref_inter, "inter {sa:?}");
17280
17281            let mut seen_d = std::collections::HashSet::new();
17282            let ref_diff: Vec<Vec<IndexLabel>> = a
17283                .to_list()
17284                .into_iter()
17285                .filter(|t| !bset.contains(t) && seen_d.insert(t.clone()))
17286                .collect();
17287            assert_eq!(a.difference(&b).unwrap().to_list(), ref_diff, "diff {sa:?}");
17288        }
17289    }
17290
17291    #[test]
17292    fn multi_index_duplicated_packed_matches_vec_reference_midedup() {
17293        // The identity-packed-key duplicated path must equal an independent
17294        // Vec<IndexLabel>-key reference for all keep modes (mixed Utf8+Int64
17295        // levels with duplicate tuples).
17296        let n = 400usize;
17297        let mut state: u64 = 0x9e37_79b9_7f4a_7c15;
17298        let mut l0 = Vec::with_capacity(n);
17299        let mut l1 = Vec::with_capacity(n);
17300        for _ in 0..n {
17301            state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
17302            l0.push(IndexLabel::Utf8(format!("g{}", (state >> 40) % 6)));
17303            state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
17304            l1.push(IndexLabel::Int64(((state >> 40) % 5) as i64));
17305        }
17306        let mi = MultiIndex::from_arrays(vec![l0, l1]).unwrap();
17307        let rows = mi.to_list();
17308
17309        for keep in [DuplicateKeep::First, DuplicateKeep::Last, DuplicateKeep::None] {
17310            let mut want = vec![false; n];
17311            match keep {
17312                DuplicateKeep::First => {
17313                    let mut seen = std::collections::HashSet::new();
17314                    for (r, w) in want.iter_mut().enumerate() {
17315                        if !seen.insert(rows[r].clone()) {
17316                            *w = true;
17317                        }
17318                    }
17319                }
17320                DuplicateKeep::Last => {
17321                    let mut seen = std::collections::HashSet::new();
17322                    for r in (0..n).rev() {
17323                        if !seen.insert(rows[r].clone()) {
17324                            want[r] = true;
17325                        }
17326                    }
17327                }
17328                DuplicateKeep::None => {
17329                    let mut counts: std::collections::HashMap<Vec<IndexLabel>, usize> =
17330                        Default::default();
17331                    for r in &rows {
17332                        *counts.entry(r.clone()).or_insert(0) += 1;
17333                    }
17334                    for (r, w) in want.iter_mut().enumerate() {
17335                        if counts[&rows[r]] > 1 {
17336                            *w = true;
17337                        }
17338                    }
17339                }
17340            }
17341            assert_eq!(mi.duplicated(keep), want, "duplicated {keep:?}");
17342        }
17343        // drop_duplicates/unique derive from duplicated(First).
17344        let mut seen = std::collections::HashSet::new();
17345        let kept: Vec<Vec<IndexLabel>> = rows
17346            .iter()
17347            .filter(|r| seen.insert((*r).clone()))
17348            .cloned()
17349            .collect();
17350        assert_eq!(mi.unique().to_list(), kept);
17351        assert_eq!(mi.nunique(), kept.len());
17352    }
17353
17354    #[test]
17355    fn multi_index_argsort_packed_matches_tuple_sort_misort() {
17356        // The sorted-packed-key argsort must equal the level-by-level tuple
17357        // comparison sort (stable, original-position tiebreak) for mixed
17358        // Utf8+Int64 levels with duplicate tuples and shuffled order.
17359        let n = 600usize;
17360        let mut state: u64 = 0x1234_5678_9abc_def1;
17361        let mut l0 = Vec::with_capacity(n);
17362        let mut l1 = Vec::with_capacity(n);
17363        for _ in 0..n {
17364            state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
17365            let a = (state >> 33) % 7; // low cardinality -> duplicate tuples
17366            state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
17367            let b = (state >> 33) % 5;
17368            l0.push(IndexLabel::Utf8(format!("g{a}")));
17369            l1.push(IndexLabel::Int64(b as i64));
17370        }
17371        let mi = MultiIndex::from_arrays(vec![l0, l1]).unwrap();
17372
17373        // Independent reference: stable sort by lexicographic tuple, ties by pos.
17374        let rows = mi.to_list();
17375        let mut want: Vec<usize> = (0..n).collect();
17376        want.sort_by(|&a, &b| rows[a].cmp(&rows[b]).then(a.cmp(&b)));
17377
17378        assert_eq!(mi.argsort(), want, "argsort");
17379        assert_eq!(mi.sort_values().to_list(), mi.take_existing_positions(&want).to_list());
17380        // min/max derive from argsort and must match the reference ends.
17381        assert_eq!(mi.min(), Some(rows[want[0]].clone()));
17382        assert_eq!(mi.max(), Some(rows[want[n - 1]].clone()));
17383    }
17384
17385    #[test]
17386    fn multi_index_get_indexer_packed_matches_vec_reference_mipack() {
17387        // The packed-u64-key path must equal an independent Vec<IndexLabel>-key
17388        // reference (mixed Utf8+Int64 levels, duplicate source, target-only
17389        // values exercising fresh per-level codes and the mixed-radix packing).
17390        let mk = |spec: &[(&str, i64)]| {
17391            MultiIndex::from_tuples(
17392                spec.iter()
17393                    .map(|(s, i)| vec![IndexLabel::Utf8((*s).to_string()), IndexLabel::Int64(*i)])
17394                    .collect::<Vec<_>>(),
17395            )
17396            .unwrap()
17397        };
17398        let source = mk(&[("a", 1), ("a", 2), ("b", 1), ("a", 1), ("c", 5), ("b", 2)]);
17399        let target = mk(&[("b", 1), ("z", 9), ("a", 1), ("a", 2), ("c", 5), ("q", 0), ("b", 2)]);
17400        let src_rows = source.to_list();
17401        let tgt_rows = target.to_list();
17402
17403        let mut pos: std::collections::HashMap<Vec<IndexLabel>, Vec<usize>> = Default::default();
17404        for (r, key) in src_rows.iter().enumerate() {
17405            pos.entry(key.clone()).or_default().push(r);
17406        }
17407        let mut ref_ix = Vec::new();
17408        let mut ref_miss = Vec::new();
17409        for (tr, key) in tgt_rows.iter().enumerate() {
17410            if let Some(m) = pos.get(key) {
17411                ref_ix.extend(m.iter().map(|&p| p as isize));
17412            } else {
17413                ref_ix.push(-1);
17414                ref_miss.push(tr);
17415            }
17416        }
17417        let (ix, miss) = source.get_indexer_non_unique(&target);
17418        assert_eq!(ix, ref_ix, "non_unique indexer");
17419        assert_eq!(miss, ref_miss, "non_unique missing");
17420
17421        let usrc = mk(&[("a", 1), ("a", 2), ("b", 1), ("c", 5), ("b", 2)]);
17422        let urows = usrc.to_list();
17423        let mut upos: std::collections::HashMap<Vec<IndexLabel>, isize> = Default::default();
17424        for (r, key) in urows.iter().enumerate() {
17425            upos.entry(key.clone()).or_insert(r as isize);
17426        }
17427        let ref_u: Vec<isize> = tgt_rows
17428            .iter()
17429            .map(|k| upos.get(k).copied().unwrap_or(-1))
17430            .collect();
17431        assert_eq!(usrc.get_indexer(&target).unwrap(), ref_u, "unique indexer");
17432    }
17433
17434    #[test]
17435    fn multi_index_get_indexer_unique_maps_hits_and_missing_d89fe1() -> Result<(), super::IndexError>
17436    {
17437        let source = MultiIndex::from_tuples(vec![
17438            vec!["a".into(), 1_i64.into()],
17439            vec!["b".into(), 2_i64.into()],
17440            vec!["c".into(), 3_i64.into()],
17441        ])?;
17442        let target = MultiIndex::from_tuples(vec![
17443            vec!["b".into(), 2_i64.into()],
17444            vec!["z".into(), 9_i64.into()],
17445            vec!["a".into(), 1_i64.into()],
17446        ])?;
17447
17448        assert_eq!(source.get_indexer(&target)?, vec![1, -1, 0]);
17449        assert_eq!(source.get_indexer_for(&target)?, vec![1, -1, 0]);
17450
17451        Ok(())
17452    }
17453
17454    #[test]
17455    fn multi_index_get_indexer_rejects_duplicate_source_d89fe1() -> Result<(), super::IndexError> {
17456        let source = MultiIndex::from_tuples(vec![
17457            vec!["a".into(), 1_i64.into()],
17458            vec!["a".into(), 1_i64.into()],
17459            vec!["b".into(), 2_i64.into()],
17460        ])?;
17461        let target = MultiIndex::from_tuples(vec![
17462            vec!["a".into(), 1_i64.into()],
17463            vec!["b".into(), 2_i64.into()],
17464        ])?;
17465
17466        let err = match source.get_indexer(&target) {
17467            Ok(indexer) => {
17468                return Err(super::IndexError::InvalidArgument(format!(
17469                    "duplicate source index unexpectedly returned {indexer:?}"
17470                )));
17471            }
17472            Err(err) => err,
17473        };
17474        assert!(matches!(
17475            err,
17476            super::IndexError::InvalidArgument(message)
17477                if message == "get_indexer requires a uniquely valued MultiIndex"
17478        ));
17479        assert_eq!(source.get_indexer_for(&target)?, vec![0, 1, 2]);
17480
17481        Ok(())
17482    }
17483
17484    #[test]
17485    fn multi_index_get_indexer_level_mismatch_marks_missing_d89fe1() -> Result<(), super::IndexError>
17486    {
17487        let source = MultiIndex::from_tuples(vec![
17488            vec!["a".into(), 1_i64.into()],
17489            vec!["b".into(), 2_i64.into()],
17490        ])?;
17491        let target = MultiIndex::from_tuples(vec![vec!["a".into()], vec!["b".into()]])?;
17492
17493        assert_eq!(source.get_indexer(&target)?, vec![-1, -1]);
17494        assert_eq!(source.get_indexer_for(&target)?, vec![-1, -1]);
17495
17496        Ok(())
17497    }
17498
17499    #[test]
17500    fn multi_index_reindex_maps_target_hits_and_missing_d89fe4() -> Result<(), super::IndexError> {
17501        let source = MultiIndex::from_tuples(vec![
17502            vec!["a".into(), 1_i64.into()],
17503            vec!["b".into(), 2_i64.into()],
17504            vec!["c".into(), 3_i64.into()],
17505        ])?;
17506        let target = MultiIndex::from_tuples(vec![
17507            vec!["b".into(), 2_i64.into()],
17508            vec!["z".into(), 9_i64.into()],
17509            vec!["a".into(), 1_i64.into()],
17510        ])?
17511        .set_names(vec![Some("letter".into()), Some("number".into())]);
17512
17513        let (reindexed, indexer) = source.reindex(&target)?;
17514        assert_eq!(reindexed, target);
17515        assert_eq!(indexer, vec![1, -1, 0]);
17516
17517        Ok(())
17518    }
17519
17520    #[test]
17521    fn multi_index_reindex_rejects_duplicate_source_d89fe4() -> Result<(), super::IndexError> {
17522        let source = MultiIndex::from_tuples(vec![
17523            vec!["a".into(), 1_i64.into()],
17524            vec!["a".into(), 1_i64.into()],
17525        ])?;
17526        let target = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?;
17527
17528        assert!(source.reindex(&target).is_err());
17529
17530        Ok(())
17531    }
17532
17533    #[test]
17534    fn multi_index_reindex_level_mismatch_marks_missing_d89fe4() -> Result<(), super::IndexError> {
17535        let source = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?;
17536        let target = MultiIndex::from_tuples(vec![vec!["a".into()]])?;
17537
17538        let (reindexed, indexer) = source.reindex(&target)?;
17539        assert_eq!(reindexed, target);
17540        assert_eq!(indexer, vec![-1]);
17541
17542        Ok(())
17543    }
17544
17545    #[test]
17546    fn multi_index_rename_replaces_all_names_d89fe5() -> Result<(), super::IndexError> {
17547        let source = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?
17548            .set_names(vec![Some("old0".into()), Some("old1".into())]);
17549
17550        let renamed = source.rename(vec![Some("new0".into()), Some("new1".into())])?;
17551
17552        assert_eq!(renamed.names(), &[Some("new0".into()), Some("new1".into())]);
17553        assert_eq!(source.names(), &[Some("old0".into()), Some("old1".into())]);
17554        assert_eq!(renamed.to_list(), source.to_list());
17555
17556        Ok(())
17557    }
17558
17559    #[test]
17560    fn multi_index_rename_level_replaces_one_name_d89fe5() -> Result<(), super::IndexError> {
17561        let source = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?
17562            .set_names(vec![Some("old0".into()), Some("old1".into())]);
17563
17564        let renamed = source.rename_level(Some("new1".into()), 1)?;
17565
17566        assert_eq!(renamed.names(), &[Some("old0".into()), Some("new1".into())]);
17567        assert_eq!(source.names(), &[Some("old0".into()), Some("old1".into())]);
17568
17569        Ok(())
17570    }
17571
17572    #[test]
17573    fn multi_index_rename_rejects_wrong_name_count_d89fe5() -> Result<(), super::IndexError> {
17574        let source = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?;
17575
17576        let err = source.rename(vec![Some("only".into())]).unwrap_err();
17577
17578        assert!(matches!(
17579            err,
17580            super::IndexError::LengthMismatch {
17581                expected: 2,
17582                actual: 1,
17583                ..
17584            }
17585        ));
17586
17587        Ok(())
17588    }
17589
17590    #[test]
17591    fn multi_index_rename_level_rejects_out_of_bounds_d89fe5() -> Result<(), super::IndexError> {
17592        let source = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?;
17593
17594        let err = source.rename_level(Some("missing".into()), 2).unwrap_err();
17595
17596        assert!(matches!(
17597            err,
17598            super::IndexError::OutOfBounds {
17599                position: 2,
17600                length: 2
17601            }
17602        ));
17603
17604        Ok(())
17605    }
17606
17607    #[test]
17608    fn multi_index_searchsorted_left_and_right_d89fe6() -> Result<(), super::IndexError> {
17609        let source = MultiIndex::from_tuples(vec![
17610            vec!["a".into(), 1_i64.into()],
17611            vec!["a".into(), 3_i64.into()],
17612            vec!["b".into(), 2_i64.into()],
17613            vec!["b".into(), 2_i64.into()],
17614        ])?;
17615        let target = MultiIndex::from_tuples(vec![
17616            vec!["a".into(), 0_i64.into()],
17617            vec!["a".into(), 1_i64.into()],
17618            vec!["a".into(), 2_i64.into()],
17619            vec!["a".into(), 3_i64.into()],
17620            vec!["b".into(), 2_i64.into()],
17621            vec!["c".into(), 0_i64.into()],
17622        ])?;
17623
17624        assert_eq!(
17625            source.searchsorted(&target, "left")?,
17626            vec![0, 0, 1, 1, 2, 4]
17627        );
17628        assert_eq!(
17629            source.searchsorted(&target, "right")?,
17630            vec![0, 1, 1, 2, 4, 4]
17631        );
17632
17633        Ok(())
17634    }
17635
17636    #[test]
17637    fn multi_index_searchsorted_empty_target_d89fe6() -> Result<(), super::IndexError> {
17638        let source = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?;
17639        let target = MultiIndex::from_tuples(Vec::new())?;
17640
17641        assert_eq!(source.searchsorted(&target, "left")?, Vec::<usize>::new());
17642
17643        Ok(())
17644    }
17645
17646    #[test]
17647    fn multi_index_searchsorted_rejects_invalid_side_d89fe6() -> Result<(), super::IndexError> {
17648        let source = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?;
17649        let target = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?;
17650
17651        let err = source.searchsorted(&target, "middle").unwrap_err();
17652
17653        assert!(matches!(
17654            err,
17655            super::IndexError::InvalidArgument(message)
17656                if message == "searchsorted: side must be 'left' or 'right', got \"middle\""
17657        ));
17658
17659        Ok(())
17660    }
17661
17662    #[test]
17663    fn multi_index_get_indexer_non_unique_level_mismatch_marks_all_missing() {
17664        let source = MultiIndex::from_tuples(vec![
17665            vec!["a".into(), 1_i64.into()],
17666            vec!["b".into(), 2_i64.into()],
17667        ])
17668        .unwrap();
17669        let target = MultiIndex::from_tuples(vec![vec!["a".into()], vec!["b".into()]]).unwrap();
17670
17671        let (indexer, missing) = source.get_indexer_non_unique(&target);
17672        assert_eq!(indexer, vec![-1, -1]);
17673        assert_eq!(missing, vec![0, 1]);
17674    }
17675
17676    #[test]
17677    fn multi_index_isin_tuple_membership() {
17678        let mi = MultiIndex::from_tuples(vec![
17679            vec!["a".into(), 1_i64.into()],
17680            vec!["b".into(), 2_i64.into()],
17681            vec!["a".into(), 3_i64.into()],
17682        ])
17683        .unwrap();
17684        let needles: Vec<Vec<IndexLabel>> = vec![
17685            vec!["a".into(), 1_i64.into()],
17686            vec!["b".into(), 2_i64.into()],
17687        ];
17688        assert_eq!(mi.isin(&needles), vec![true, true, false]);
17689    }
17690
17691    #[test]
17692    fn multi_index_isin_ignores_mismatched_tuple_length() {
17693        let mi = MultiIndex::from_tuples(vec![
17694            vec!["a".into(), 1_i64.into()],
17695            vec!["b".into(), 2_i64.into()],
17696        ])
17697        .unwrap();
17698        // Wrong-arity tuple contributes no matches.
17699        let needles: Vec<Vec<IndexLabel>> = vec![vec!["a".into()]];
17700        assert_eq!(mi.isin(&needles), vec![false, false]);
17701    }
17702
17703    #[test]
17704    fn multi_index_isin_empty_values_yields_all_false() {
17705        let mi = MultiIndex::from_tuples(vec![
17706            vec!["a".into(), 1_i64.into()],
17707            vec!["b".into(), 2_i64.into()],
17708        ])
17709        .unwrap();
17710        let needles: Vec<Vec<IndexLabel>> = Vec::new();
17711        assert_eq!(mi.isin(&needles), vec![false, false]);
17712    }
17713
17714    #[test]
17715    fn multi_index_isin_level_filters_by_level() {
17716        let mi = MultiIndex::from_tuples(vec![
17717            vec!["a".into(), 1_i64.into()],
17718            vec!["b".into(), 2_i64.into()],
17719            vec!["a".into(), 3_i64.into()],
17720        ])
17721        .unwrap();
17722        let level0 = mi.isin_level(&["a".into()], 0).unwrap();
17723        assert_eq!(level0, vec![true, false, true]);
17724        let level1 = mi.isin_level(&[2_i64.into(), 3_i64.into()], 1).unwrap();
17725        assert_eq!(level1, vec![false, true, true]);
17726    }
17727
17728    #[test]
17729    fn multi_index_isin_level_out_of_bounds_errors() {
17730        let mi = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]]).unwrap();
17731        let err = mi.isin_level(&["a".into()], 5).unwrap_err();
17732        assert!(matches!(err, crate::IndexError::OutOfBounds { .. }));
17733    }
17734
17735    #[test]
17736    fn multi_index_isin_empty_index_yields_empty() {
17737        let mi = MultiIndex::from_tuples(Vec::new()).unwrap();
17738        let needles: Vec<Vec<IndexLabel>> = vec![vec!["a".into(), 1_i64.into()]];
17739        assert_eq!(mi.isin(&needles), Vec::<bool>::new());
17740    }
17741
17742    #[test]
17743    fn multi_index_duplicated_keep_first_default() {
17744        let mi = MultiIndex::from_tuples(vec![
17745            vec!["a".into(), 1_i64.into()],
17746            vec!["b".into(), 2_i64.into()],
17747            vec!["a".into(), 1_i64.into()],
17748            vec!["c".into(), 3_i64.into()],
17749        ])
17750        .unwrap();
17751        let dup = mi.duplicated(DuplicateKeep::First);
17752        assert_eq!(dup, vec![false, false, true, false]);
17753    }
17754
17755    #[test]
17756    fn multi_index_duplicated_keep_last_marks_earlier_occurrences() {
17757        let mi = MultiIndex::from_tuples(vec![
17758            vec!["a".into(), 1_i64.into()],
17759            vec!["a".into(), 1_i64.into()],
17760            vec!["b".into(), 2_i64.into()],
17761        ])
17762        .unwrap();
17763        let dup = mi.duplicated(DuplicateKeep::Last);
17764        assert_eq!(dup, vec![true, false, false]);
17765    }
17766
17767    #[test]
17768    fn multi_index_duplicated_keep_none_marks_all_repeats() {
17769        let mi = MultiIndex::from_tuples(vec![
17770            vec!["a".into(), 1_i64.into()],
17771            vec!["b".into(), 2_i64.into()],
17772            vec!["a".into(), 1_i64.into()],
17773            vec!["c".into(), 3_i64.into()],
17774        ])
17775        .unwrap();
17776        let dup = mi.duplicated(DuplicateKeep::None);
17777        assert_eq!(dup, vec![true, false, true, false]);
17778    }
17779
17780    #[test]
17781    fn multi_index_is_unique_true_and_false() {
17782        let unique = MultiIndex::from_tuples(vec![
17783            vec!["a".into(), 1_i64.into()],
17784            vec!["b".into(), 2_i64.into()],
17785        ])
17786        .unwrap();
17787        assert!(unique.is_unique());
17788        assert!(!unique.has_duplicates());
17789
17790        let duped = MultiIndex::from_tuples(vec![
17791            vec!["a".into(), 1_i64.into()],
17792            vec!["a".into(), 1_i64.into()],
17793        ])
17794        .unwrap();
17795        assert!(!duped.is_unique());
17796        assert!(duped.has_duplicates());
17797    }
17798
17799    #[test]
17800    fn multi_index_duplicated_empty_yields_empty() {
17801        let mi = MultiIndex::from_tuples(Vec::new()).unwrap();
17802        assert_eq!(mi.duplicated(DuplicateKeep::First), Vec::<bool>::new());
17803        assert!(mi.is_unique());
17804    }
17805
17806    #[test]
17807    fn multi_index_all_any_reject_bool_reduction_d89fe7() -> Result<(), super::IndexError> {
17808        let non_empty = MultiIndex::from_tuples(vec![
17809            vec!["a".into(), 1_i64.into()],
17810            vec!["b".into(), 2_i64.into()],
17811        ])?;
17812        let empty = MultiIndex::from_arrays(vec![Vec::new(), Vec::new()])?;
17813
17814        let cases = [
17815            (
17816                non_empty.all().unwrap_err(),
17817                "cannot perform all with this index type: MultiIndex",
17818            ),
17819            (
17820                non_empty.any().unwrap_err(),
17821                "cannot perform any with this index type: MultiIndex",
17822            ),
17823            (
17824                empty.all().unwrap_err(),
17825                "cannot perform all with this index type: MultiIndex",
17826            ),
17827            (
17828                empty.any().unwrap_err(),
17829                "cannot perform any with this index type: MultiIndex",
17830            ),
17831        ];
17832
17833        for (err, expected) in cases {
17834            assert!(matches!(
17835                err,
17836                super::IndexError::InvalidArgument(message) if message == expected
17837            ));
17838        }
17839
17840        Ok(())
17841    }
17842
17843    #[test]
17844    fn multi_index_shift_rejects_temporal_shift_d89fe9() -> Result<(), super::IndexError> {
17845        let mi = MultiIndex::from_tuples(vec![
17846            vec!["a".into(), 1_i64.into()],
17847            vec!["b".into(), 2_i64.into()],
17848        ])?;
17849        let expected = "This method is only implemented for DatetimeIndex, PeriodIndex and TimedeltaIndex; Got type MultiIndex";
17850
17851        for err in [
17852            mi.shift(1, None).unwrap_err(),
17853            mi.shift(0, None).unwrap_err(),
17854            mi.shift(1, Some("D")).unwrap_err(),
17855        ] {
17856            assert!(matches!(
17857                err,
17858                super::IndexError::InvalidArgument(message) if message == expected
17859            ));
17860        }
17861
17862        Ok(())
17863    }
17864
17865    #[test]
17866    fn multi_index_str_rejects_string_accessor_d89fe12() -> Result<(), super::IndexError> {
17867        let mi = MultiIndex::from_tuples(vec![
17868            vec!["a".into(), 1_i64.into()],
17869            vec!["b".into(), 2_i64.into()],
17870        ])?;
17871
17872        let err = mi.r#str().unwrap_err();
17873
17874        assert!(matches!(
17875            err,
17876            super::IndexError::InvalidArgument(message)
17877                if message == "Can only use .str accessor with Index, not MultiIndex"
17878        ));
17879
17880        Ok(())
17881    }
17882
17883    #[test]
17884    fn multi_index_astype_object_clones_other_dtypes_reject_c2x17() -> Result<(), super::IndexError>
17885    {
17886        let mi = MultiIndex::from_tuples(vec![
17887            vec!["a".into(), 1_i64.into()],
17888            vec!["b".into(), 2_i64.into()],
17889        ])?;
17890
17891        for dtype in ["object", "O"] {
17892            let cloned = mi.astype(dtype)?;
17893            assert!(cloned.equals(&mi));
17894            assert_eq!(cloned.nlevels(), mi.nlevels());
17895            assert_eq!(cloned.len(), mi.len());
17896        }
17897
17898        let cat_err = mi.astype("category").unwrap_err();
17899        assert!(matches!(
17900            cat_err,
17901            super::IndexError::InvalidArgument(message)
17902                if message == "> 1 ndim Categorical are not supported at this time"
17903        ));
17904
17905        for dtype in ["int64", "float64", "datetime64[ns]"] {
17906            let err = mi.astype(dtype).unwrap_err();
17907            let expected = format!(
17908                "Setting a MultiIndex dtype to anything other than object is not supported; got {dtype}"
17909            );
17910            assert!(matches!(
17911                err,
17912                super::IndexError::InvalidArgument(message) if message == expected
17913            ));
17914        }
17915
17916        Ok(())
17917    }
17918
17919    #[test]
17920    fn multi_index_diff_rejects_tuple_subtraction_c2x17() -> Result<(), super::IndexError> {
17921        let mi = MultiIndex::from_tuples(vec![
17922            vec!["a".into(), 1_i64.into()],
17923            vec!["b".into(), 2_i64.into()],
17924            vec!["c".into(), 3_i64.into()],
17925        ])?;
17926        let expected = "cannot perform __sub__ with this index type: MultiIndex";
17927
17928        for periods in [-1_i64, 0, 1, 2] {
17929            let err = mi.diff(periods).unwrap_err();
17930            assert!(matches!(
17931                err,
17932                super::IndexError::InvalidArgument(message) if message == expected
17933            ));
17934        }
17935
17936        Ok(())
17937    }
17938
17939    #[test]
17940    fn multi_index_round_rejects_tuple_rint_c2x17() -> Result<(), super::IndexError> {
17941        let mi = MultiIndex::from_tuples(vec![
17942            vec!["a".into(), 1_i64.into()],
17943            vec!["b".into(), 2_i64.into()],
17944        ])?;
17945        let expected = "loop of ufunc does not support argument 0 of type tuple which has no callable rint method";
17946
17947        for decimals in [-1_i32, 0, 1, 4] {
17948            let err = mi.round(decimals).unwrap_err();
17949            assert!(matches!(
17950                err,
17951                super::IndexError::InvalidArgument(message) if message == expected
17952            ));
17953        }
17954
17955        Ok(())
17956    }
17957
17958    #[test]
17959    fn range_index_argmax_argmin_handles_step_direction_mrchb() {
17960        let asc = super::RangeIndex::new(0, 5, 1).unwrap();
17961        assert_eq!(asc.argmax().unwrap(), 4);
17962        assert_eq!(asc.argmin().unwrap(), 0);
17963
17964        let desc = super::RangeIndex::new(10, 0, -2).unwrap();
17965        assert_eq!(desc.argmax().unwrap(), 0);
17966        assert_eq!(desc.argmin().unwrap(), desc.len() - 1);
17967
17968        let big_step = super::RangeIndex::new(1, 100, 7).unwrap();
17969        assert_eq!(big_step.argmax().unwrap(), big_step.len() - 1);
17970        assert_eq!(big_step.argmin().unwrap(), 0);
17971    }
17972
17973    #[test]
17974    fn range_index_argmax_argmin_reject_empty_mrchb() {
17975        let empty = super::RangeIndex::new(5, 5, 1).unwrap();
17976        assert!(empty.is_empty());
17977        let max_err = empty.argmax().unwrap_err();
17978        assert!(matches!(
17979            max_err,
17980            super::IndexError::InvalidArgument(ref message)
17981                if message == "attempt to get argmax of an empty sequence"
17982        ));
17983        let min_err = empty.argmin().unwrap_err();
17984        assert!(matches!(
17985            min_err,
17986            super::IndexError::InvalidArgument(ref message)
17987                if message == "attempt to get argmin of an empty sequence"
17988        ));
17989    }
17990
17991    #[test]
17992    fn range_index_argsort_orientation_matches_step_sign_mrchb() {
17993        let asc = super::RangeIndex::new(0, 5, 1).unwrap();
17994        assert_eq!(asc.argsort(), vec![0, 1, 2, 3, 4]);
17995
17996        let desc = super::RangeIndex::new(10, 0, -2).unwrap();
17997        assert_eq!(desc.argsort(), vec![4, 3, 2, 1, 0]);
17998
17999        let empty = super::RangeIndex::new(0, 0, 1).unwrap();
18000        assert_eq!(empty.argsort(), Vec::<usize>::new());
18001    }
18002
18003    #[test]
18004    fn datetime_index_time_of_day_accessors_match_pandas_znejf() {
18005        // 2024-01-01T12:34:56.789012345Z
18006        // secs = 1704112496, subsec_nanos = 789_012_345
18007        // total nanos = 1_704_112_496_000_000_000 + 789_012_345
18008        //             = 1_704_112_496_789_012_345
18009        let total: i64 = 1_704_112_496 * 1_000_000_000 + 789_012_345;
18010        let dt = super::DatetimeIndex::new(vec![total, i64::MIN, 0]);
18011
18012        assert_eq!(dt.hour(), vec![Some(12), None, Some(0)]);
18013        assert_eq!(dt.minute(), vec![Some(34), None, Some(0)]);
18014        assert_eq!(dt.second(), vec![Some(56), None, Some(0)]);
18015        assert_eq!(dt.microsecond(), vec![Some(789_012), None, Some(0)]);
18016        assert_eq!(dt.nanosecond(), vec![Some(345), None, Some(0)]);
18017    }
18018
18019    #[test]
18020    fn datetime_index_time_of_day_indexers_match_pandas_bwzmn() -> Result<(), super::IndexError> {
18021        let hour = fp_types::Timedelta::NANOS_PER_HOUR;
18022        let minute = fp_types::Timedelta::NANOS_PER_MIN;
18023        let day = fp_types::Timedelta::NANOS_PER_DAY;
18024        let dt = super::DatetimeIndex::new(vec![
18025            9 * hour,
18026            12 * hour + 30 * minute,
18027            i64::MIN,
18028            23 * hour + 30 * minute,
18029            day + 30 * minute,
18030        ]);
18031
18032        assert_eq!(dt.indexer_at_time("12:30")?, vec![1]);
18033        assert_eq!(dt.indexer_at_time("12:30:00.000000000")?, vec![1]);
18034        assert_eq!(dt.indexer_at_time("00:30:00")?, vec![4]);
18035        assert!(dt.indexer_at_time("not-a-time").is_err());
18036
18037        assert_eq!(
18038            dt.indexer_between_time("08:00", "13:00", true, true)?,
18039            vec![0, 1]
18040        );
18041        assert_eq!(
18042            dt.indexer_between_time("09:00", "13:00", false, true)?,
18043            vec![1]
18044        );
18045        assert_eq!(
18046            dt.indexer_between_time("23:00", "01:00", true, true)?,
18047            vec![3, 4]
18048        );
18049        assert_eq!(
18050            dt.indexer_between_time("23:30", "00:30", false, false)?,
18051            Vec::<usize>::new()
18052        );
18053        assert!(
18054            dt.indexer_between_time("09:00", "not-a-time", true, true)
18055                .is_err()
18056        );
18057        Ok(())
18058    }
18059
18060    #[test]
18061    fn datetime_index_week_weekofyear_match_pandas_e8xhb() {
18062        const NS: i64 = 1_000_000_000;
18063        // 2024-01-01 (Monday) is in ISO week 1 of 2024.
18064        let jan_01 = 1_704_067_200_i64 * NS;
18065        // 2024-12-30 (Monday) is in ISO week 1 of 2025 (yes: pandas/ chrono
18066        // both report this as week 1).
18067        let dec_30 = 1_735_516_800_i64 * NS;
18068        let dt = super::DatetimeIndex::new(vec![jan_01, dec_30, i64::MIN]);
18069
18070        let weeks = dt.week();
18071        assert_eq!(weeks[0], Some(1));
18072        assert_eq!(weeks[1], Some(1));
18073        assert_eq!(weeks[2], None);
18074
18075        // weekofyear is an alias.
18076        assert_eq!(dt.weekofyear(), weeks);
18077        assert_eq!(
18078            dt.isocalendar(),
18079            vec![Some((2024, 1, 1)), Some((2025, 1, 1)), None]
18080        );
18081    }
18082
18083    #[test]
18084    fn datetime_index_day_of_x_and_quarter_match_pandas_k860x() {
18085        // 2024-01-15T00:00:00Z (a Monday).
18086        let mon: i64 = 1_705_276_800 * 1_000_000_000;
18087        // 2024-01-21T00:00:00Z (a Sunday).
18088        let sun: i64 = 1_705_795_200 * 1_000_000_000;
18089        // 2024-04-30T00:00:00Z (Apr -> 30 days; Q2).
18090        let apr30: i64 = 1_714_435_200 * 1_000_000_000;
18091        let dt = super::DatetimeIndex::new(vec![mon, sun, apr30, i64::MIN]);
18092
18093        // 2024 is a leap year. Jan 15 = ordinal 15. Jan 21 = ordinal 21.
18094        // Apr 30 = 31+29+31+30 = 121.
18095        assert_eq!(dt.dayofyear(), vec![Some(15), Some(21), Some(121), None]);
18096        assert_eq!(dt.day_of_year(), dt.dayofyear());
18097
18098        // Mon=0, Sun=6. Apr 30 2024 was a Tuesday → 1.
18099        assert_eq!(dt.dayofweek(), vec![Some(0), Some(6), Some(1), None]);
18100        assert_eq!(dt.day_of_week(), dt.dayofweek());
18101        assert_eq!(dt.weekday(), dt.dayofweek());
18102
18103        // Q1 / Q1 / Q2.
18104        assert_eq!(dt.quarter(), vec![Some(1), Some(1), Some(2), None]);
18105
18106        // 2024 is a leap year.
18107        assert_eq!(
18108            dt.is_leap_year(),
18109            vec![Some(true), Some(true), Some(true), None]
18110        );
18111
18112        // Jan -> 31, Apr -> 30.
18113        assert_eq!(dt.days_in_month(), vec![Some(31), Some(31), Some(30), None]);
18114        assert_eq!(dt.daysinmonth(), dt.days_in_month());
18115    }
18116
18117    #[test]
18118    fn datetime_index_boundary_accessors_match_pandas_qy7yd() {
18119        // 2024 is a leap year. Each entry is the 00:00:00Z second-of-epoch
18120        // multiplied by 1_000_000_000.
18121        const NS: i64 = 1_000_000_000;
18122        let jan_01 = 1_704_067_200_i64 * NS; // year/quarter/month start
18123        let jan_31 = 1_706_659_200_i64 * NS; // month end (Jan)
18124        let feb_29 = 1_709_164_800_i64 * NS; // leap-month end
18125        let mar_31 = 1_711_843_200_i64 * NS; // quarter/month end (Q1)
18126        let apr_01 = 1_711_929_600_i64 * NS; // quarter/month start (Q2)
18127        let dec_31 = 1_735_603_200_i64 * NS; // year/quarter/month end
18128        let nat = i64::MIN;
18129
18130        let dt =
18131            super::DatetimeIndex::new(vec![jan_01, jan_31, feb_29, mar_31, apr_01, dec_31, nat]);
18132
18133        // is_year_start: only Jan 1.
18134        assert_eq!(
18135            dt.is_year_start(),
18136            vec![
18137                Some(true),
18138                Some(false),
18139                Some(false),
18140                Some(false),
18141                Some(false),
18142                Some(false),
18143                None
18144            ]
18145        );
18146        // is_year_end: only Dec 31.
18147        assert_eq!(
18148            dt.is_year_end(),
18149            vec![
18150                Some(false),
18151                Some(false),
18152                Some(false),
18153                Some(false),
18154                Some(false),
18155                Some(true),
18156                None
18157            ]
18158        );
18159        // is_quarter_start: Jan 1, Apr 1.
18160        assert_eq!(
18161            dt.is_quarter_start(),
18162            vec![
18163                Some(true),
18164                Some(false),
18165                Some(false),
18166                Some(false),
18167                Some(true),
18168                Some(false),
18169                None
18170            ]
18171        );
18172        // is_quarter_end: Mar 31, Dec 31.
18173        assert_eq!(
18174            dt.is_quarter_end(),
18175            vec![
18176                Some(false),
18177                Some(false),
18178                Some(false),
18179                Some(true),
18180                Some(false),
18181                Some(true),
18182                None
18183            ]
18184        );
18185        // is_month_start: Jan 1, Apr 1.
18186        assert_eq!(
18187            dt.is_month_start(),
18188            vec![
18189                Some(true),
18190                Some(false),
18191                Some(false),
18192                Some(false),
18193                Some(true),
18194                Some(false),
18195                None
18196            ]
18197        );
18198        // is_month_end: Jan 31, Feb 29 (leap), Mar 31, Dec 31.
18199        assert_eq!(
18200            dt.is_month_end(),
18201            vec![
18202                Some(false),
18203                Some(true),
18204                Some(true),
18205                Some(true),
18206                Some(false),
18207                Some(true),
18208                None
18209            ]
18210        );
18211    }
18212
18213    #[test]
18214    fn index_variants_insert_match_pandas_veabb() -> Result<(), super::IndexError> {
18215        const NS: i64 = 1_000_000_000;
18216        let a = 1_704_067_200_i64 * NS;
18217        let b = 1_705_276_800_i64 * NS;
18218        let c = 1_706_140_800_i64 * NS;
18219        let dt = super::DatetimeIndex::new(vec![a, c]).set_name("ts");
18220
18221        // Middle insertion.
18222        let middle = dt.insert(1, b)?;
18223        assert_eq!(middle.values(), vec![Some(a), Some(b), Some(c)]);
18224        assert_eq!(middle.name(), Some("ts"));
18225
18226        // End insertion (loc == len()).
18227        let end = dt.insert(dt.len(), b)?;
18228        assert_eq!(end.values(), vec![Some(a), Some(c), Some(b)]);
18229
18230        // OOB.
18231        assert!(matches!(
18232            dt.insert(99, b).unwrap_err(),
18233            super::IndexError::OutOfBounds {
18234                position: 99,
18235                length: 2
18236            }
18237        ));
18238
18239        let td = super::TimedeltaIndex::new(vec![100_i64, 300]).set_name("d");
18240        let td_inserted = td.insert(1, 200)?;
18241        assert_eq!(td_inserted.values(), vec![Some(100), Some(200), Some(300)]);
18242        assert_eq!(td_inserted.name(), Some("d"));
18243
18244        use fp_types::{Period, PeriodFreq};
18245        let p1 = Period::new(10, PeriodFreq::Monthly);
18246        let p2 = Period::new(11, PeriodFreq::Monthly);
18247        let p3 = Period::new(12, PeriodFreq::Monthly);
18248        let pi = super::PeriodIndex::new(vec![p1, p3]).set_name("p");
18249        let pi_inserted = pi.insert(1, p2)?;
18250        assert_eq!(pi_inserted.values(), &[p1, p2, p3]);
18251
18252        let r = super::RangeIndex::new(0, 3, 1).unwrap();
18253        let r_inserted = r.insert(1, 99)?;
18254        let labels = int64_labels(&r_inserted);
18255        assert_eq!(labels, vec![0, 99, 1, 2]);
18256        Ok(())
18257    }
18258
18259    #[test]
18260    fn index_variants_format_match_pandas_n31q2() {
18261        const NS: i64 = 1_000_000_000;
18262        let dt = super::DatetimeIndex::new(vec![1_704_067_200_i64 * NS, i64::MIN]);
18263        let dt_fmt = dt.format();
18264        assert!(dt_fmt[0].starts_with("2024-01-01"));
18265        assert_eq!(dt_fmt[1], "NaT");
18266
18267        let td = super::TimedeltaIndex::new(vec![1_000_000_i64, fp_types::Timedelta::NAT]);
18268        let td_fmt = td.format();
18269        assert_eq!(td_fmt[0], "1000000");
18270        assert_eq!(td_fmt[1], "NaT");
18271
18272        use fp_types::{Period, PeriodFreq};
18273        let pi = super::PeriodIndex::new(vec![Period::new(10, PeriodFreq::Monthly)]);
18274        let pi_fmt = pi.format();
18275        assert!(!pi_fmt[0].is_empty());
18276
18277        let cat = super::CategoricalIndex::from_values(vec!["a".to_owned(), "b".to_owned()], false);
18278        assert_eq!(cat.format(), vec!["a".to_owned(), "b".to_owned()]);
18279    }
18280
18281    #[test]
18282    fn datetime_timedelta_fillna_isnull_match_pandas_az3t9() {
18283        const NS: i64 = 1_000_000_000;
18284        let unix = 1_704_067_200_i64 * NS;
18285        let dt = super::DatetimeIndex::new(vec![unix, i64::MIN, 0]).set_name("ts");
18286
18287        let filled = dt.fillna(unix);
18288        // NAT is replaced; existing values are preserved.
18289        assert_eq!(filled.values(), vec![Some(unix), Some(unix), Some(0)]);
18290        assert_eq!(filled.name(), Some("ts"));
18291
18292        let iso = dt.isnull();
18293        assert_eq!(iso, dt.isna());
18294        let nio = dt.notnull();
18295        assert_eq!(nio, dt.notna());
18296
18297        let nat = fp_types::Timedelta::NAT;
18298        let td = super::TimedeltaIndex::new(vec![100_i64, nat, 0]).set_name("d");
18299        let td_filled = td.fillna(99);
18300        assert_eq!(td_filled.values(), vec![Some(100), Some(99), Some(0)]);
18301        assert_eq!(td_filled.name(), Some("d"));
18302        assert_eq!(td.isnull(), td.isna());
18303        assert_eq!(td.notnull(), td.notna());
18304    }
18305
18306    #[test]
18307    fn datetime_index_date_and_time_accessors_match_pandas_66pll() {
18308        const NS: i64 = 1_000_000_000;
18309        // 2024-01-15T12:34:56.789012345Z (computed in br-teeck).
18310        let total: i64 = 1_705_322_096_i64 * NS + 789_012_345;
18311        let dt = super::DatetimeIndex::new(vec![total, i64::MIN, 0]);
18312
18313        let dates = dt.date();
18314        assert_eq!(
18315            dates[0],
18316            Some(chrono::NaiveDate::from_ymd_opt(2024, 1, 15).unwrap())
18317        );
18318        assert_eq!(dates[1], None);
18319        assert_eq!(
18320            dates[2],
18321            Some(chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap())
18322        );
18323
18324        let times = dt.time();
18325        assert_eq!(
18326            times[0],
18327            chrono::NaiveTime::from_hms_nano_opt(12, 34, 56, 789_012_345)
18328        );
18329        assert_eq!(times[1], None);
18330        assert_eq!(times[2], chrono::NaiveTime::from_hms_nano_opt(0, 0, 0, 0));
18331        assert_eq!(dt.timetz(), times);
18332    }
18333
18334    #[test]
18335    fn datetime_index_to_pydatetime_and_julian_match_pandas_dww6m() {
18336        const NS: i64 = 1_000_000_000;
18337        // 2024-01-01T00:00:00Z
18338        let unix = 1_704_067_200_i64;
18339        let total = unix * NS;
18340        let dt = super::DatetimeIndex::new(vec![total, i64::MIN]);
18341
18342        let pydt = dt.to_pydatetime();
18343        let first = pydt[0].expect("non-NAT label decodes");
18344        assert_eq!(first.timestamp(), unix);
18345        assert_eq!(pydt[1], None);
18346
18347        let julian = dt.to_julian_date();
18348        // JD for 2024-01-01T00:00:00Z = 2_460_310.5.
18349        let expected = (unix as f64) / 86_400.0 + 2_440_587.5;
18350        let observed = julian[0].expect("non-NAT label decodes");
18351        assert!((observed - expected).abs() < 1e-9);
18352        assert_eq!(julian[1], None);
18353    }
18354
18355    #[test]
18356    fn timedelta_index_to_pytimedelta_match_pandas_dww6m() {
18357        let one_day_nanos = fp_types::Timedelta::NANOS_PER_DAY;
18358        let td = super::TimedeltaIndex::new(vec![one_day_nanos, fp_types::Timedelta::NAT]);
18359        let durations = td.to_pytimedelta();
18360        let one_day = durations[0].expect("non-NAT label decodes");
18361        assert_eq!(one_day.num_seconds(), 86_400);
18362        assert_eq!(durations[1], None);
18363    }
18364
18365    #[test]
18366    fn datetime_index_tz_localize_tz_convert_match_pandas_qm31w() {
18367        const NS: i64 = 1_000_000_000;
18368        let dt = super::DatetimeIndex::new(vec![1_704_067_200_i64 * NS]).set_name("ts");
18369
18370        // UTC is a no-op clone.
18371        let utc = dt.tz_localize("UTC").expect("UTC localize");
18372        assert!(utc.equals(&dt));
18373        assert_eq!(utc.name(), Some("ts"));
18374
18375        // Other timezones reject.
18376        let err = dt.tz_localize("US/Eastern").unwrap_err();
18377        assert!(matches!(
18378            err,
18379            super::IndexError::InvalidArgument(ref message)
18380                if message.contains("tz_localize") && message.contains("UTC")
18381        ));
18382
18383        // tz_convert always rejects.
18384        let conv_err = dt.tz_convert("UTC").unwrap_err();
18385        assert!(matches!(
18386            conv_err,
18387            super::IndexError::InvalidArgument(ref message)
18388                if message.contains("tz_convert")
18389        ));
18390    }
18391
18392    #[test]
18393    fn datetime_timedelta_as_unit_match_pandas_70mbe() {
18394        let dt = super::DatetimeIndex::new(vec![]);
18395        assert!(dt.as_unit("ns").is_ok());
18396        let bad = dt.as_unit("us").unwrap_err();
18397        assert!(matches!(
18398            bad,
18399            super::IndexError::InvalidArgument(ref msg) if msg.contains("as_unit")
18400        ));
18401
18402        let td = super::TimedeltaIndex::new(vec![]);
18403        assert!(td.as_unit("ns").is_ok());
18404        assert!(td.as_unit("ms").is_err());
18405    }
18406
18407    #[test]
18408    fn datetime_timedelta_unit_resolution_match_pandas_c50rv() {
18409        let dt = super::DatetimeIndex::new(vec![]);
18410        assert_eq!(dt.unit(), "ns");
18411        assert_eq!(dt.resolution(), "nanosecond");
18412
18413        let td = super::TimedeltaIndex::new(vec![]);
18414        assert_eq!(td.unit(), "ns");
18415        assert_eq!(td.resolution(), "nanosecond");
18416    }
18417
18418    #[test]
18419    fn datetime_timedelta_tz_freq_accessors_return_none_ze7et() {
18420        const NS: i64 = 1_000_000_000;
18421        let dt = super::DatetimeIndex::new(vec![1_704_067_200_i64 * NS]);
18422        assert_eq!(dt.tz(), None);
18423        assert_eq!(dt.tzinfo(), None);
18424        assert_eq!(dt.freq(), None);
18425        assert_eq!(dt.freqstr(), None);
18426        assert_eq!(dt.inferred_freq(), None);
18427
18428        let td = super::TimedeltaIndex::new(vec![100_i64]);
18429        assert_eq!(td.freq(), None);
18430        assert_eq!(td.freqstr(), None);
18431        assert_eq!(td.inferred_freq(), None);
18432    }
18433
18434    #[test]
18435    fn period_index_freqstr_inferred_freq_match_pandas_ze7et() {
18436        use fp_types::{Period, PeriodFreq};
18437        let pi = super::PeriodIndex::new(vec![
18438            Period::new(10, PeriodFreq::Monthly),
18439            Period::new(11, PeriodFreq::Monthly),
18440        ]);
18441        let s = pi.freqstr().expect("homogeneous index has a freqstr");
18442        assert!(!s.is_empty());
18443        let inferred = pi.inferred_freq().expect("homogeneous freq is inferable");
18444        assert_eq!(inferred, s);
18445
18446        // Mixed-frequency index: inferred_freq returns None.
18447        let mixed = super::PeriodIndex::new(vec![
18448            Period::new(10, PeriodFreq::Monthly),
18449            Period::new(10, PeriodFreq::Annual),
18450        ]);
18451        assert_eq!(mixed.inferred_freq(), None);
18452
18453        // Empty index: freqstr is None.
18454        let empty = super::PeriodIndex::new(Vec::new());
18455        assert_eq!(empty.freqstr(), None);
18456        assert_eq!(empty.inferred_freq(), None);
18457    }
18458
18459    #[test]
18460    fn range_index_where_putmask_match_pandas_jw1kw() -> Result<(), super::IndexError> {
18461        let r = super::RangeIndex::new(0, 5, 1).unwrap().set_name("r");
18462
18463        let masked = r.r#where(&[true, false, true, false, true], 99)?;
18464        assert_eq!(int64_labels(&masked), vec![0, 99, 2, 99, 4]);
18465        assert_eq!(masked.name(), Some("r"));
18466
18467        let put = r.putmask(&[false, true, false, true, false], 99)?;
18468        assert_eq!(int64_labels(&put), vec![0, 99, 2, 99, 4]);
18469
18470        // Length mismatch.
18471        assert!(matches!(
18472            r.r#where(&[true, false], 0).unwrap_err(),
18473            super::IndexError::LengthMismatch { .. }
18474        ));
18475        assert!(matches!(
18476            r.putmask(&[true; 7], 0).unwrap_err(),
18477            super::IndexError::LengthMismatch { .. }
18478        ));
18479        Ok(())
18480    }
18481
18482    #[test]
18483    fn range_index_set_ops_match_pandas_tz40f() {
18484        let left = super::RangeIndex::new(0, 5, 1).unwrap().set_name("r");
18485        let right = super::RangeIndex::new(3, 8, 1).unwrap().set_name("r");
18486
18487        let inter = left.intersection(&right);
18488        assert_eq!(int64_labels(&inter), vec![3, 4]);
18489        assert_eq!(inter.name(), Some("r"));
18490
18491        let union = left.union(&right);
18492        assert_eq!(int64_labels(&union), vec![0, 1, 2, 3, 4, 5, 6, 7]);
18493
18494        let diff = left.difference(&right);
18495        assert_eq!(int64_labels(&diff), vec![0, 1, 2]);
18496
18497        let sym = left.symmetric_difference(&right);
18498        assert_eq!(int64_labels(&sym), vec![0, 1, 2, 5, 6, 7]);
18499
18500        // Mismatched names drop the name.
18501        let other_name = super::RangeIndex::new(3, 6, 1).unwrap().set_name("other");
18502        assert_eq!(left.union(&other_name).name(), None);
18503    }
18504
18505    #[test]
18506    fn period_range_slice_indexer_match_pandas_18kvv() -> Result<(), super::IndexError> {
18507        use fp_types::{Period, PeriodFreq};
18508        let pi = super::PeriodIndex::new(vec![
18509            Period::new(10, PeriodFreq::Monthly),
18510            Period::new(11, PeriodFreq::Monthly),
18511            Period::new(12, PeriodFreq::Monthly),
18512        ]);
18513        assert_eq!(
18514            pi.slice_indexer(
18515                Period::new(11, PeriodFreq::Monthly),
18516                Period::new(12, PeriodFreq::Monthly)
18517            )?,
18518            1..3
18519        );
18520
18521        let r = super::RangeIndex::new(0, 10, 2).unwrap();
18522        assert_eq!(r.slice_indexer(2, 6)?, 1..4);
18523        Ok(())
18524    }
18525
18526    #[test]
18527    fn period_range_slice_locs_match_pandas_fdga0() -> Result<(), super::IndexError> {
18528        use fp_types::{Period, PeriodFreq};
18529        let p1 = Period::new(10, PeriodFreq::Monthly);
18530        let p2 = Period::new(11, PeriodFreq::Monthly);
18531        let p3 = Period::new(12, PeriodFreq::Monthly);
18532        let p4 = Period::new(13, PeriodFreq::Monthly);
18533        let pi = super::PeriodIndex::new(vec![p1, p2, p3, p4]);
18534        assert_eq!(pi.slice_locs(p2, p3)?, (1, 3));
18535        assert_eq!(pi.slice_locs(p1, p4)?, (0, 4));
18536        // Non-monotonic rejects.
18537        let unsorted = super::PeriodIndex::new(vec![p3, p1, p2]);
18538        assert!(unsorted.slice_locs(p1, p3).is_err());
18539
18540        let r = super::RangeIndex::new(0, 10, 2).unwrap();
18541        // Values 0,2,4,6,8.
18542        assert_eq!(r.slice_locs(2, 6)?, (1, 4));
18543        assert_eq!(r.slice_locs(0, 8)?, (0, 5));
18544
18545        // Descending rejects.
18546        let desc = super::RangeIndex::new(10, 0, -2).unwrap();
18547        assert!(desc.slice_locs(2, 6).is_err());
18548        Ok(())
18549    }
18550
18551    #[test]
18552    fn typed_index_variants_rename_alias_match_pandas_i8t6n() {
18553        let dt = super::DatetimeIndex::new(vec![]);
18554        assert_eq!(dt.rename("ts").name(), Some("ts"));
18555
18556        let td = super::TimedeltaIndex::new(vec![]);
18557        assert_eq!(td.rename("d").name(), Some("d"));
18558
18559        use fp_types::PeriodFreq;
18560        let pi = super::PeriodIndex::new(vec![]);
18561        assert_eq!(pi.rename("p").name(), Some("p"));
18562        let _ = PeriodFreq::Monthly; // suppress unused-import warning when no other test in scope
18563
18564        let r = super::RangeIndex::new(0, 0, 1).unwrap();
18565        assert_eq!(r.rename("r").name(), Some("r"));
18566
18567        let cat = super::CategoricalIndex::from_values(vec!["a".to_owned()], false);
18568        assert_eq!(cat.rename("c").name(), Some("c"));
18569    }
18570
18571    #[test]
18572    fn typed_index_variants_reindex_match_pandas_qm3nq() {
18573        const NS: i64 = 1_000_000_000;
18574        let a = 1_704_067_200_i64 * NS;
18575        let b = 1_705_276_800_i64 * NS;
18576        let dt = super::DatetimeIndex::new(vec![a, b]);
18577        let target = super::DatetimeIndex::new(vec![b, a, 0]);
18578        let (out, indexer) = dt.reindex(&target);
18579        assert_eq!(out.values(), target.values());
18580        assert_eq!(indexer, vec![1, 0, -1]);
18581
18582        let td = super::TimedeltaIndex::new(vec![100_i64, 200]);
18583        let td_target = super::TimedeltaIndex::new(vec![200_i64, 999]);
18584        let (_, td_indexer) = td.reindex(&td_target);
18585        assert_eq!(td_indexer, vec![1, -1]);
18586
18587        use fp_types::{Period, PeriodFreq};
18588        let p1 = Period::new(10, PeriodFreq::Monthly);
18589        let p2 = Period::new(11, PeriodFreq::Monthly);
18590        let pi = super::PeriodIndex::new(vec![p1, p2]);
18591        let pi_target = super::PeriodIndex::new(vec![p2, Period::new(99, PeriodFreq::Monthly)]);
18592        let (_, pi_indexer) = pi.reindex(&pi_target);
18593        assert_eq!(pi_indexer, vec![1, -1]);
18594
18595        let r = super::RangeIndex::new(0, 5, 1).unwrap();
18596        let r_target = super::RangeIndex::new(2, 6, 1).unwrap();
18597        let (_, r_indexer) = r.reindex(&r_target);
18598        assert_eq!(r_indexer, vec![2, 3, 4, -1]);
18599    }
18600
18601    #[test]
18602    fn period_range_categorical_get_indexer_non_unique_match_pandas_z9sna()
18603    -> Result<(), super::IndexError> {
18604        use fp_types::{Period, PeriodFreq};
18605        let p1 = Period::new(10, PeriodFreq::Monthly);
18606        let p2 = Period::new(11, PeriodFreq::Monthly);
18607        // PeriodIndex with duplicate p1.
18608        let pi = super::PeriodIndex::new(vec![p1, p2, p1]);
18609        let (positions, missing) =
18610            pi.get_indexer_non_unique(&[p1, Period::new(99, PeriodFreq::Monthly)]);
18611        assert_eq!(positions, vec![0, 2, -1]);
18612        assert_eq!(missing, vec![1]);
18613
18614        // RangeIndex (always unique).
18615        let r = super::RangeIndex::new(0, 5, 1).unwrap();
18616        let (positions, missing) = r.get_indexer_non_unique(&[2, 99]);
18617        assert_eq!(positions, vec![2, -1]);
18618        assert_eq!(missing, vec![1]);
18619
18620        // CategoricalIndex with duplicate "a".
18621        let cat = super::CategoricalIndex::from_values(
18622            vec!["a".to_owned(), "b".to_owned(), "a".to_owned()],
18623            false,
18624        );
18625        let (positions, missing) = cat.get_indexer_non_unique(&["a".to_owned(), "z".to_owned()]);
18626        assert_eq!(positions, vec![0, 2, -1]);
18627        assert_eq!(missing, vec![1]);
18628
18629        // Categorical get_indexer also works.
18630        let mapped = cat.get_indexer(&["b".to_owned(), "z".to_owned()]);
18631        assert_eq!(mapped, vec![1, -1]);
18632        // get_indexer_for is an alias.
18633        assert_eq!(
18634            cat.get_indexer_for(&["a".to_owned()]),
18635            cat.get_indexer(&["a".to_owned()])
18636        );
18637        Ok(())
18638    }
18639
18640    #[test]
18641    fn typed_index_variants_get_indexer_for_aliases_match_pandas_lf1jy()
18642    -> Result<(), super::IndexError> {
18643        const NS: i64 = 1_000_000_000;
18644        let dt = super::DatetimeIndex::new(vec![1_704_067_200_i64 * NS]);
18645        assert_eq!(
18646            dt.get_indexer_for(&[1_704_067_200_i64 * NS, 0]),
18647            dt.get_indexer(&[1_704_067_200_i64 * NS, 0])
18648        );
18649
18650        let td = super::TimedeltaIndex::new(vec![100_i64, 200]);
18651        assert_eq!(td.get_indexer_for(&[200, 999]), td.get_indexer(&[200, 999]));
18652
18653        use fp_types::{Period, PeriodFreq};
18654        let pi = super::PeriodIndex::new(vec![Period::new(10, PeriodFreq::Monthly)]);
18655        let target = vec![Period::new(10, PeriodFreq::Monthly)];
18656        assert_eq!(pi.get_indexer_for(&target), pi.get_indexer(&target));
18657
18658        let r = super::RangeIndex::new(0, 5, 1).unwrap();
18659        assert_eq!(r.get_indexer_for(&[2, 99]), r.get_indexer(&[2, 99]));
18660        Ok(())
18661    }
18662
18663    #[test]
18664    fn period_range_get_loc_get_indexer_match_pandas_e7psu() -> Result<(), super::IndexError> {
18665        use fp_types::{Period, PeriodFreq};
18666        let p1 = Period::new(10, PeriodFreq::Monthly);
18667        let p2 = Period::new(11, PeriodFreq::Monthly);
18668        let p3 = Period::new(12, PeriodFreq::Monthly);
18669        let pi = super::PeriodIndex::new(vec![p1, p2, p3]);
18670        assert_eq!(pi.get_loc(p2)?, 1);
18671        assert!(pi.get_loc(Period::new(99, PeriodFreq::Monthly)).is_err());
18672        assert_eq!(
18673            pi.get_indexer(&[p3, p1, Period::new(99, PeriodFreq::Monthly)]),
18674            vec![2, 0, -1]
18675        );
18676
18677        // RangeIndex with step=2: 0, 2, 4, 6, 8.
18678        let r = super::RangeIndex::new(0, 10, 2).unwrap();
18679        assert_eq!(r.get_loc(0)?, 0);
18680        assert_eq!(r.get_loc(8)?, 4);
18681        assert!(r.get_loc(7).is_err()); // not in step
18682        assert!(r.get_loc(99).is_err()); // out of range
18683        assert_eq!(r.get_indexer(&[4, 7, 0, 99]), vec![2, -1, 0, -1]);
18684
18685        // Negative step: 10, 8, 6, 4, 2.
18686        let desc = super::RangeIndex::new(10, 0, -2).unwrap();
18687        assert_eq!(desc.get_loc(10)?, 0);
18688        assert_eq!(desc.get_loc(2)?, 4);
18689        assert!(desc.get_loc(7).is_err());
18690        Ok(())
18691    }
18692
18693    #[test]
18694    fn period_index_where_putmask_match_pandas_so9oh() -> Result<(), super::IndexError> {
18695        use fp_types::{Period, PeriodFreq};
18696        let p1 = Period::new(10, PeriodFreq::Monthly);
18697        let p2 = Period::new(11, PeriodFreq::Monthly);
18698        let p3 = Period::new(12, PeriodFreq::Monthly);
18699        let pi = super::PeriodIndex::new(vec![p1, p2, p3]).set_name("p");
18700
18701        // where: keep position 0 and 2.
18702        let masked = pi.r#where(&[true, false, true], p1)?;
18703        assert_eq!(masked.values(), &[p1, p1, p3]);
18704        assert_eq!(masked.name(), Some("p"));
18705
18706        // putmask: replace masked positions.
18707        let put = pi.putmask(&[false, true, false], p1)?;
18708        assert_eq!(put.values(), &[p1, p1, p3]);
18709
18710        // Length mismatch.
18711        let bad_len = pi.r#where(&[true, false], p1).unwrap_err();
18712        assert!(matches!(bad_len, super::IndexError::LengthMismatch { .. }));
18713
18714        // Mismatched freq replacement rejects.
18715        let mismatch = Period::new(10, PeriodFreq::Annual);
18716        assert!(pi.r#where(&[true, false, true], mismatch).is_err());
18717        assert!(pi.putmask(&[false, true, false], mismatch).is_err());
18718        Ok(())
18719    }
18720
18721    #[test]
18722    fn categorical_index_where_putmask_match_pandas_so9oh() -> Result<(), super::IndexError> {
18723        let cat = super::CategoricalIndex::with_categories(
18724            vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
18725            vec![
18726                "a".to_owned(),
18727                "b".to_owned(),
18728                "c".to_owned(),
18729                "d".to_owned(),
18730            ],
18731            false,
18732        )?;
18733
18734        let masked = cat.r#where(&[true, false, true], "d")?;
18735        assert_eq!(
18736            masked.labels(),
18737            vec!["a".to_owned(), "d".to_owned(), "c".to_owned()].as_slice()
18738        );
18739
18740        let put = cat.putmask(&[false, true, true], "d")?;
18741        assert_eq!(
18742            put.labels(),
18743            vec!["a".to_owned(), "d".to_owned(), "d".to_owned()].as_slice()
18744        );
18745
18746        // Replacement that's not a category rejects.
18747        assert!(cat.r#where(&[true, false, true], "zzz").is_err());
18748
18749        // Length mismatch.
18750        assert!(cat.putmask(&[true; 5], "a").is_err());
18751        Ok(())
18752    }
18753
18754    #[test]
18755    fn period_index_set_ops_match_pandas_8042v() -> Result<(), super::IndexError> {
18756        use fp_types::{Period, PeriodFreq};
18757        let p1 = Period::new(10, PeriodFreq::Monthly);
18758        let p2 = Period::new(11, PeriodFreq::Monthly);
18759        let p3 = Period::new(12, PeriodFreq::Monthly);
18760        let p4 = Period::new(13, PeriodFreq::Monthly);
18761        let left = super::PeriodIndex::new(vec![p1, p2, p3]).set_name("p");
18762        let right = super::PeriodIndex::new(vec![p2, p3, p4]).set_name("p");
18763
18764        assert_eq!(left.intersection(&right)?.values(), &[p2, p3]);
18765        assert_eq!(left.union(&right)?.values(), &[p1, p2, p3, p4]);
18766        assert_eq!(left.difference(&right)?.values(), &[p1]);
18767        assert_eq!(left.symmetric_difference(&right)?.values(), &[p1, p4]);
18768
18769        // Mismatched freq rejects.
18770        let mismatch = super::PeriodIndex::new(vec![Period::new(10, PeriodFreq::Annual)]);
18771        assert!(left.intersection(&mismatch).is_err());
18772        assert!(left.union(&mismatch).is_err());
18773        assert!(left.difference(&mismatch).is_err());
18774        assert!(left.symmetric_difference(&mismatch).is_err());
18775
18776        // Mismatched names drop the name.
18777        let other_name = super::PeriodIndex::new(vec![p2]).set_name("other");
18778        assert_eq!(left.union(&other_name)?.name(), None);
18779        Ok(())
18780    }
18781
18782    #[test]
18783    fn period_categorical_sort_values_match_pandas_482qd() -> Result<(), super::IndexError> {
18784        use fp_types::{Period, PeriodFreq};
18785        let p1 = Period::new(10, PeriodFreq::Monthly);
18786        let p2 = Period::new(11, PeriodFreq::Monthly);
18787        let p3 = Period::new(12, PeriodFreq::Monthly);
18788        let pi = super::PeriodIndex::new(vec![p3, p1, p2]).set_name("p");
18789        let sorted = pi.sort_values()?;
18790        let sorted_alias = pi.sort()?;
18791        assert_eq!(sorted.values(), &[p1, p2, p3]);
18792        assert_eq!(sorted_alias.values(), sorted.values());
18793        assert_eq!(sorted.name(), Some("p"));
18794        assert_eq!(sorted_alias.name(), Some("p"));
18795
18796        let mixed = super::PeriodIndex::new(vec![
18797            Period::new(10, PeriodFreq::Monthly),
18798            Period::new(10, PeriodFreq::Annual),
18799        ]);
18800        assert!(mixed.sort_values().is_err());
18801        assert!(mixed.sort().is_err());
18802
18803        // CategoricalIndex with ordered=true uses category position.
18804        let cat = super::CategoricalIndex::with_categories(
18805            vec![
18806                "b".to_owned(),
18807                "a".to_owned(),
18808                "c".to_owned(),
18809                "a".to_owned(),
18810            ],
18811            vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
18812            true,
18813        )?;
18814        let cat_sorted = cat.sort_values();
18815        let cat_sorted_alias = cat.sort();
18816        assert_eq!(
18817            cat_sorted.labels(),
18818            vec![
18819                "a".to_owned(),
18820                "a".to_owned(),
18821                "b".to_owned(),
18822                "c".to_owned()
18823            ]
18824            .as_slice()
18825        );
18826        assert_eq!(cat_sorted_alias.labels(), cat_sorted.labels());
18827
18828        Ok(())
18829    }
18830
18831    #[test]
18832    fn categorical_sort_values_by_category_code_not_lexicographic() {
18833        // Non-lexicographic category order: codes b=0, a=1, c=2. pandas
18834        // sort_values orders by code -> b, a, a, c (NOT lexicographic a,a,b,c).
18835        // The old `to_index().argsort()` text sort returned a,a,b,c, which
18836        // diverges from pandas; the existing 482qd test used lexicographic
18837        // categories so it could not catch this.
18838        let cat = super::CategoricalIndex::with_categories(
18839            vec![
18840                "a".to_owned(),
18841                "c".to_owned(),
18842                "b".to_owned(),
18843                "a".to_owned(),
18844            ],
18845            vec!["b".to_owned(), "a".to_owned(), "c".to_owned()],
18846            true,
18847        )
18848        .unwrap();
18849        // labels [a,c,b,a] -> codes [1,2,0,1]; stable argsort by code:
18850        // code0 -> pos2 (b); code1 -> pos0 (a), pos3 (a); code2 -> pos1 (c).
18851        assert_eq!(cat.argsort(), vec![2, 0, 3, 1]);
18852        assert_eq!(
18853            cat.sort_values().labels(),
18854            [
18855                "b".to_owned(),
18856                "a".to_owned(),
18857                "a".to_owned(),
18858                "c".to_owned()
18859            ]
18860            .as_slice()
18861        );
18862
18863        // Unordered categoricals also sort by category code in pandas.
18864        let cat_u = super::CategoricalIndex::with_categories(
18865            vec!["a".to_owned(), "b".to_owned()],
18866            vec!["b".to_owned(), "a".to_owned()],
18867            false,
18868        )
18869        .unwrap();
18870        // codes a=1, b=0 -> sorted by code: b, a.
18871        assert_eq!(
18872            cat_u.sort_values().labels(),
18873            ["b".to_owned(), "a".to_owned()].as_slice()
18874        );
18875    }
18876
18877    #[test]
18878    fn period_index_from_ordinals_match_pandas_baenb() {
18879        use fp_types::PeriodFreq;
18880        let pi = super::PeriodIndex::from_ordinals(&[10, 11, 12], PeriodFreq::Monthly);
18881        assert_eq!(pi.values().len(), 3);
18882        assert_eq!(pi.values()[0].ordinal, 10);
18883        assert_eq!(pi.values()[2].ordinal, 12);
18884        assert_eq!(pi.asi8(), vec![10, 11, 12]);
18885        for period in pi.values() {
18886            assert_eq!(period.freq, PeriodFreq::Monthly);
18887        }
18888
18889        let empty = super::PeriodIndex::from_ordinals(&[], PeriodFreq::Annual);
18890        assert!(empty.is_empty());
18891        assert!(empty.asi8().is_empty());
18892    }
18893
18894    #[test]
18895    fn period_index_astype_datetime_and_int_match_pandas() -> Result<(), super::IndexError> {
18896        use fp_types::PeriodFreq;
18897
18898        let pi = super::PeriodIndex::from_ordinals(&[600, 601], PeriodFreq::Monthly).set_name("p");
18899
18900        let as_int = pi.astype("int64")?;
18901        assert_eq!(
18902            as_int.labels(),
18903            &[IndexLabel::Int64(600), IndexLabel::Int64(601)]
18904        );
18905        assert_eq!(as_int.name(), Some("p"));
18906
18907        let as_datetime = pi.astype("datetime64[ns]")?;
18908        assert_eq!(
18909            as_datetime.labels(),
18910            &[
18911                IndexLabel::Datetime64(1_577_836_800_000_000_000),
18912                IndexLabel::Datetime64(1_580_515_200_000_000_000),
18913            ]
18914        );
18915        assert_eq!(as_datetime.name(), Some("p"));
18916
18917        Ok(())
18918    }
18919
18920    #[test]
18921    fn period_index_missing_value_accessors_are_all_present() {
18922        use fp_types::PeriodFreq;
18923        let pi = super::PeriodIndex::from_ordinals(&[10, 11, 12], PeriodFreq::Monthly)
18924            .set_name("periods");
18925        assert!(!pi.hasnans());
18926        assert_eq!(pi.isna(), vec![false, false, false]);
18927        assert_eq!(pi.isnull(), pi.isna());
18928        assert_eq!(pi.notna(), vec![true, true, true]);
18929        assert_eq!(pi.notnull(), pi.notna());
18930        let dropped = pi.dropna();
18931        assert_eq!(dropped.values(), pi.values());
18932        assert_eq!(dropped.name(), Some("periods"));
18933    }
18934
18935    #[test]
18936    fn period_index_mean_median_match_pandas_3rsrc() -> Result<(), super::IndexError> {
18937        use fp_types::{Period, PeriodFreq};
18938        let p1 = Period::new(10, PeriodFreq::Monthly);
18939        let p2 = Period::new(20, PeriodFreq::Monthly);
18940        let p3 = Period::new(30, PeriodFreq::Monthly);
18941        let pi = super::PeriodIndex::new(vec![p1, p2, p3]);
18942        assert_eq!(pi.mean()?.unwrap().ordinal, 20);
18943        assert_eq!(pi.median()?.unwrap().ordinal, 20);
18944
18945        let empty = super::PeriodIndex::new(Vec::new());
18946        assert_eq!(empty.mean()?, None);
18947        assert_eq!(empty.median()?, None);
18948
18949        let mixed = super::PeriodIndex::new(vec![p1, Period::new(10, PeriodFreq::Annual)]);
18950        assert!(mixed.mean().is_err());
18951        assert!(mixed.median().is_err());
18952        Ok(())
18953    }
18954
18955    #[test]
18956    fn period_index_argmax_argmin_argsort_match_pandas_qg8u5() -> Result<(), super::IndexError> {
18957        use fp_types::{Period, PeriodFreq};
18958        let p1 = Period::new(10, PeriodFreq::Monthly);
18959        let p2 = Period::new(11, PeriodFreq::Monthly);
18960        let p3 = Period::new(12, PeriodFreq::Monthly);
18961        let pi = super::PeriodIndex::new(vec![p2, p3, p1]);
18962
18963        assert_eq!(pi.argmax()?, 1);
18964        assert_eq!(pi.argmin()?, 2);
18965        assert_eq!(pi.argsort()?, vec![2, 0, 1]);
18966
18967        let empty = super::PeriodIndex::new(Vec::new());
18968        assert!(empty.argmax().is_err());
18969        assert!(empty.argmin().is_err());
18970        assert!(empty.argsort()?.is_empty());
18971
18972        let mixed = super::PeriodIndex::new(vec![p1, Period::new(10, PeriodFreq::Annual)]);
18973        assert!(mixed.argmax().is_err());
18974        assert!(mixed.argsort().is_err());
18975        Ok(())
18976    }
18977
18978    #[test]
18979    fn period_index_shift_match_pandas_pnaui() -> Result<(), super::IndexError> {
18980        use fp_types::{Period, PeriodFreq};
18981        let p1 = Period::new(10, PeriodFreq::Monthly);
18982        let p2 = Period::new(11, PeriodFreq::Monthly);
18983        let pi = super::PeriodIndex::new(vec![p1, p2]).set_name("p");
18984
18985        let shifted = pi.shift(2)?;
18986        assert_eq!(shifted.values()[0].ordinal, 12);
18987        assert_eq!(shifted.values()[1].ordinal, 13);
18988        assert_eq!(shifted.name(), Some("p"));
18989
18990        // Negative shift.
18991        let back = pi.shift(-1)?;
18992        assert_eq!(back.values()[0].ordinal, 9);
18993
18994        // Mixed-freq rejects.
18995        let mixed = super::PeriodIndex::new(vec![p1, Period::new(10, PeriodFreq::Annual)]);
18996        assert!(mixed.shift(1).is_err());
18997        Ok(())
18998    }
18999
19000    #[test]
19001    fn period_index_is_full_match_pandas_7i32m() {
19002        use fp_types::{Period, PeriodFreq};
19003        let p1 = Period::new(10, PeriodFreq::Monthly);
19004        let p2 = Period::new(11, PeriodFreq::Monthly);
19005        let p3 = Period::new(12, PeriodFreq::Monthly);
19006        let p5 = Period::new(14, PeriodFreq::Monthly);
19007
19008        // Contiguous.
19009        let full = super::PeriodIndex::new(vec![p1, p2, p3]);
19010        assert!(full.is_full());
19011
19012        // Out-of-order but contiguous (sort first).
19013        let unsorted = super::PeriodIndex::new(vec![p3, p1, p2]);
19014        assert!(unsorted.is_full());
19015
19016        // Gap.
19017        let gap = super::PeriodIndex::new(vec![p1, p2, p5]);
19018        assert!(!gap.is_full());
19019
19020        // Empty / single-element.
19021        assert!(super::PeriodIndex::new(Vec::new()).is_full());
19022        assert!(super::PeriodIndex::new(vec![p1]).is_full());
19023
19024        // Mixed-frequency.
19025        let mixed = super::PeriodIndex::new(vec![p1, Period::new(10, PeriodFreq::Annual)]);
19026        assert!(!mixed.is_full());
19027    }
19028
19029    #[test]
19030    fn period_index_min_max_match_pandas_fwlv4() -> Result<(), super::IndexError> {
19031        use fp_types::{Period, PeriodFreq};
19032        let p1 = Period::new(10, PeriodFreq::Monthly);
19033        let p2 = Period::new(11, PeriodFreq::Monthly);
19034        let p3 = Period::new(12, PeriodFreq::Monthly);
19035        let pi = super::PeriodIndex::new(vec![p3, p1, p2]);
19036        assert_eq!(pi.min()?, Some(p1));
19037        assert_eq!(pi.max()?, Some(p3));
19038
19039        let empty = super::PeriodIndex::new(Vec::new());
19040        assert_eq!(empty.min()?, None);
19041        assert_eq!(empty.max()?, None);
19042
19043        // Mixed freq rejects.
19044        let mixed = super::PeriodIndex::new(vec![
19045            Period::new(10, PeriodFreq::Monthly),
19046            Period::new(10, PeriodFreq::Annual),
19047        ]);
19048        assert!(mixed.min().is_err());
19049        assert!(mixed.max().is_err());
19050        Ok(())
19051    }
19052
19053    #[test]
19054    fn range_index_sort_values_closed_form_mhcge() {
19055        let asc = super::RangeIndex::new(0, 5, 1).unwrap();
19056        assert!(asc.sort_values().equals(&asc));
19057        assert!(asc.sort().equals(&asc));
19058
19059        let desc = super::RangeIndex::new(10, 0, -2).unwrap();
19060        // Original values 10, 8, 6, 4, 2 → sorted ascending 2, 4, 6, 8, 10.
19061        let sorted = desc.sort_values();
19062        let sorted_alias = desc.sort();
19063        assert_eq!(sorted.values(), vec![2, 4, 6, 8, 10]);
19064        assert_eq!(sorted_alias.values(), sorted.values());
19065
19066        let empty = super::RangeIndex::new(0, 0, 1).unwrap();
19067        assert!(empty.sort_values().is_empty());
19068        assert!(empty.sort().is_empty());
19069
19070        let zero_step = super::RangeIndex::new(0, 5, 1).unwrap();
19071        assert!(zero_step.sort_values().equals(&zero_step));
19072        assert!(zero_step.sort().equals(&zero_step));
19073    }
19074
19075    #[test]
19076    fn range_index_std_var_median_closed_form_tkc0m() {
19077        let r = super::RangeIndex::new(1, 11, 1).unwrap();
19078        // 1..=10: median = 5.5; var = sum((x - 5.5)^2)/9; std = sqrt(var).
19079        assert_eq!(r.median(), Some(5.5));
19080        let var = r.var().unwrap();
19081        // Expected variance for 1..=10 is 9.166666...
19082        assert!((var - 9.1666666666).abs() < 1e-6);
19083        let std_val = r.std().unwrap();
19084        assert!((std_val - var.sqrt()).abs() < 1e-12);
19085
19086        // Single element: var/std None; median = the value.
19087        let one = super::RangeIndex::new(5, 6, 1).unwrap();
19088        assert_eq!(one.median(), Some(5.0));
19089        assert_eq!(one.var(), None);
19090        assert_eq!(one.std(), None);
19091
19092        // Empty: all None.
19093        let empty = super::RangeIndex::new(0, 0, 1).unwrap();
19094        assert_eq!(empty.median(), None);
19095        assert_eq!(empty.var(), None);
19096    }
19097
19098    #[test]
19099    fn range_index_prod_match_pandas_8yxw8() {
19100        // 1..=5 prod = 120.
19101        let r = super::RangeIndex::new(1, 6, 1).unwrap();
19102        assert_eq!(r.prod(), 120);
19103
19104        // Empty prod = 1.
19105        let empty = super::RangeIndex::new(0, 0, 1).unwrap();
19106        assert_eq!(empty.prod(), 1);
19107
19108        // Includes zero → prod = 0.
19109        let with_zero = super::RangeIndex::new(0, 5, 1).unwrap();
19110        assert_eq!(with_zero.prod(), 0);
19111    }
19112
19113    #[test]
19114    fn range_index_min_max_sum_mean_closed_form_fwlv4() {
19115        let asc = super::RangeIndex::new(1, 11, 1).unwrap();
19116        // Values 1..=10
19117        assert_eq!(asc.min(), Some(1));
19118        assert_eq!(asc.max(), Some(10));
19119        assert_eq!(asc.sum(), 55);
19120        assert_eq!(asc.mean(), Some(5.5));
19121
19122        let desc = super::RangeIndex::new(10, 0, -2).unwrap();
19123        // Values 10, 8, 6, 4, 2 — sum=30, mean=6, min=2, max=10
19124        assert_eq!(desc.min(), Some(2));
19125        assert_eq!(desc.max(), Some(10));
19126        assert_eq!(desc.sum(), 30);
19127        assert_eq!(desc.mean(), Some(6.0));
19128
19129        let empty = super::RangeIndex::new(0, 0, 1).unwrap();
19130        assert_eq!(empty.min(), None);
19131        assert_eq!(empty.max(), None);
19132        assert_eq!(empty.sum(), 0);
19133        assert_eq!(empty.mean(), None);
19134    }
19135
19136    #[test]
19137    fn datetime_index_where_putmask_match_pandas_nwqty() -> Result<(), super::IndexError> {
19138        const NS: i64 = 1_000_000_000;
19139        let a = 1_704_067_200_i64 * NS;
19140        let b = 1_705_276_800_i64 * NS;
19141        let c = 1_706_140_800_i64 * NS;
19142        let dt = super::DatetimeIndex::new(vec![a, b, c]).set_name("ts");
19143
19144        // where: keep position 0 and 2; replace position 1 with i64::MIN (NAT).
19145        let masked = dt.r#where(&[true, false, true], i64::MIN)?;
19146        assert_eq!(masked.values(), vec![Some(a), None, Some(c)]);
19147        assert_eq!(masked.name(), Some("ts"));
19148
19149        // putmask: replace positions where mask=true with c.
19150        let put = dt.putmask(&[true, false, false], c)?;
19151        assert_eq!(put.values(), vec![Some(c), Some(b), Some(c)]);
19152
19153        // Length mismatch errors.
19154        let bad_cond = dt.r#where(&[true, false], i64::MIN).unwrap_err();
19155        assert!(matches!(
19156            bad_cond,
19157            super::IndexError::LengthMismatch {
19158                expected: 3,
19159                actual: 2,
19160                ..
19161            }
19162        ));
19163        let bad_mask = dt.putmask(&[true; 5], c).unwrap_err();
19164        assert!(matches!(
19165            bad_mask,
19166            super::IndexError::LengthMismatch {
19167                expected: 3,
19168                actual: 5,
19169                ..
19170            }
19171        ));
19172        Ok(())
19173    }
19174
19175    #[test]
19176    fn timedelta_index_where_putmask_match_pandas_nwqty() -> Result<(), super::IndexError> {
19177        let nat = fp_types::Timedelta::NAT;
19178        let td = super::TimedeltaIndex::new(vec![100_i64, 200, 300]).set_name("d");
19179
19180        let masked = td.r#where(&[false, true, false], nat)?;
19181        assert_eq!(masked.values(), vec![None, Some(200), None]);
19182        assert_eq!(masked.name(), Some("d"));
19183
19184        let put = td.putmask(&[false, true, true], 999)?;
19185        assert_eq!(put.values(), vec![Some(100), Some(999), Some(999)]);
19186
19187        let bad = td.r#where(&[true, false], nat).unwrap_err();
19188        assert!(matches!(
19189            bad,
19190            super::IndexError::LengthMismatch {
19191                expected: 3,
19192                actual: 2,
19193                ..
19194            }
19195        ));
19196        Ok(())
19197    }
19198
19199    #[test]
19200    fn index_variants_searchsorted_match_pandas_tam73() -> Result<(), super::IndexError> {
19201        const NS: i64 = 1_000_000_000;
19202        let a = 1_704_067_200_i64 * NS;
19203        let b = 1_705_276_800_i64 * NS;
19204        let c = 1_706_140_800_i64 * NS;
19205        let dt = super::DatetimeIndex::new(vec![a, b, c]);
19206
19207        assert_eq!(dt.searchsorted(a, "left")?, 0);
19208        assert_eq!(dt.searchsorted(a, "right")?, 1);
19209        assert_eq!(dt.searchsorted(c, "right")?, 3);
19210        // Mid-range insertion (between a and b).
19211        let mid = a + 1;
19212        assert_eq!(dt.searchsorted(mid, "left")?, 1);
19213
19214        // Bad side.
19215        assert!(dt.searchsorted(a, "middle").is_err());
19216
19217        let td = super::TimedeltaIndex::new(vec![100_i64, 200, 300]);
19218        assert_eq!(td.searchsorted(150, "left")?, 1);
19219        assert_eq!(td.searchsorted(200, "right")?, 2);
19220
19221        use fp_types::{Period, PeriodFreq};
19222        let p1 = Period::new(10, PeriodFreq::Monthly);
19223        let p2 = Period::new(11, PeriodFreq::Monthly);
19224        let p3 = Period::new(12, PeriodFreq::Monthly);
19225        let pi = super::PeriodIndex::new(vec![p1, p2, p3]);
19226        assert_eq!(pi.searchsorted(p2, "left")?, 1);
19227        assert_eq!(pi.searchsorted(p3, "right")?, 3);
19228        // Mismatched freq rejects.
19229        let mismatch = Period::new(10, PeriodFreq::Annual);
19230        assert!(pi.searchsorted(mismatch, "left").is_err());
19231
19232        let r = super::RangeIndex::new(0, 10, 2).unwrap();
19233        // Range values: [0, 2, 4, 6, 8].
19234        assert_eq!(r.searchsorted(4, "left")?, 2);
19235        assert_eq!(r.searchsorted(4, "right")?, 3);
19236        assert_eq!(r.searchsorted(7, "left")?, 4);
19237
19238        // Descending range rejects.
19239        let desc = super::RangeIndex::new(10, 0, -2).unwrap();
19240        assert!(desc.searchsorted(4, "left").is_err());
19241        Ok(())
19242    }
19243
19244    #[test]
19245    fn datetime_timedelta_get_indexer_non_unique_match_pandas_sm32a() {
19246        const NS: i64 = 1_000_000_000;
19247        let a = 1_704_067_200_i64 * NS;
19248        let b = 1_705_276_800_i64 * NS;
19249        // 4-element index with a duplicated `a`.
19250        let dt = super::DatetimeIndex::new(vec![a, b, a, b]);
19251        let (positions, missing) = dt.get_indexer_non_unique(&[a, b + 99]);
19252        // a matches positions 0 and 2; b+99 is missing.
19253        assert_eq!(positions, vec![0, 2, -1]);
19254        assert_eq!(missing, vec![1]);
19255
19256        let td = super::TimedeltaIndex::new(vec![100_i64, 200, 100]);
19257        let (positions, missing) = td.get_indexer_non_unique(&[100, 999]);
19258        assert_eq!(positions, vec![0, 2, -1]);
19259        assert_eq!(missing, vec![1]);
19260    }
19261
19262    #[test]
19263    fn datetime_timedelta_get_loc_get_indexer_match_pandas_6x9de() -> Result<(), super::IndexError>
19264    {
19265        const NS: i64 = 1_000_000_000;
19266        let a = 1_704_067_200_i64 * NS;
19267        let b = 1_705_276_800_i64 * NS;
19268        let c = 1_706_140_800_i64 * NS;
19269        let dt = super::DatetimeIndex::new(vec![a, b, c]);
19270
19271        // get_loc finds first position.
19272        assert_eq!(dt.get_loc(b)?, 1);
19273        let missing_err = dt.get_loc(b + 1).unwrap_err();
19274        assert!(matches!(
19275            missing_err,
19276            super::IndexError::InvalidArgument(ref msg) if msg.contains("get_loc")
19277        ));
19278
19279        // get_indexer maps each target.
19280        let mapped = dt.get_indexer(&[c, a, b + 999]);
19281        assert_eq!(mapped, vec![2, 0, -1]);
19282
19283        // TimedeltaIndex spot check.
19284        let td = super::TimedeltaIndex::new(vec![100_i64, 200, 300]);
19285        assert_eq!(td.get_loc(200)?, 1);
19286        assert_eq!(td.get_indexer(&[300, 999, 100]), vec![2, -1, 0]);
19287        Ok(())
19288    }
19289
19290    #[test]
19291    fn datetime_timedelta_slice_indexer_match_pandas_95eqf() -> Result<(), super::IndexError> {
19292        const NS: i64 = 1_000_000_000;
19293        let a = 1_704_067_200_i64 * NS;
19294        let b = 1_705_276_800_i64 * NS;
19295        let c = 1_706_140_800_i64 * NS;
19296        let dt = super::DatetimeIndex::new(vec![a, b, c]);
19297        assert_eq!(dt.slice_indexer(b, c)?, 1..3);
19298        assert_eq!(dt.slice_indexer(a, c)?, 0..3);
19299
19300        let td = super::TimedeltaIndex::new(vec![100_i64, 200, 300]);
19301        assert_eq!(td.slice_indexer(150, 250)?, 1..2);
19302        Ok(())
19303    }
19304
19305    #[test]
19306    fn datetime_timedelta_get_slice_bound_match_pandas_x7r04() -> Result<(), super::IndexError> {
19307        const NS: i64 = 1_000_000_000;
19308        let a = 1_704_067_200_i64 * NS;
19309        let b = 1_705_276_800_i64 * NS;
19310        let c = 1_706_140_800_i64 * NS;
19311        let dt = super::DatetimeIndex::new(vec![a, b, c]);
19312        assert_eq!(dt.get_slice_bound(b, "left")?, 1);
19313        assert_eq!(dt.get_slice_bound(b, "right")?, 2);
19314        assert!(dt.get_slice_bound(b, "middle").is_err());
19315
19316        let td = super::TimedeltaIndex::new(vec![100_i64, 200, 300]);
19317        assert_eq!(td.get_slice_bound(150, "left")?, 1);
19318        assert_eq!(td.get_slice_bound(200, "right")?, 2);
19319        Ok(())
19320    }
19321
19322    #[test]
19323    fn datetime_timedelta_slice_locs_match_pandas_mxedz() -> Result<(), super::IndexError> {
19324        const NS: i64 = 1_000_000_000;
19325        let a = 1_704_067_200_i64 * NS;
19326        let b = 1_705_276_800_i64 * NS;
19327        let c = 1_706_140_800_i64 * NS;
19328        let d = 1_707_350_400_i64 * NS;
19329        let dt = super::DatetimeIndex::new(vec![a, b, c, d]);
19330
19331        // Slice [b, c]: positions [1, 3) (right is exclusive in slice_locs).
19332        assert_eq!(dt.slice_locs(b, c)?, (1, 3));
19333        // Slice [a, d]: full range.
19334        assert_eq!(dt.slice_locs(a, d)?, (0, 4));
19335        // Slice past the end: empty range.
19336        assert_eq!(dt.slice_locs(d + 1, d + 2)?, (4, 4));
19337
19338        // Non-monotonic rejects.
19339        let unsorted = super::DatetimeIndex::new(vec![c, a, b, d]);
19340        assert!(unsorted.slice_locs(a, c).is_err());
19341
19342        // TimedeltaIndex spot check.
19343        let td = super::TimedeltaIndex::new(vec![100_i64, 200, 300]);
19344        assert_eq!(td.slice_locs(150, 250)?, (1, 2));
19345
19346        Ok(())
19347    }
19348
19349    #[test]
19350    fn index_variants_to_flat_index_match_pandas_wcpw5() {
19351        const NS: i64 = 1_000_000_000;
19352        let dt = super::DatetimeIndex::new(vec![1_704_067_200_i64 * NS]).set_name("ts");
19353        let dt_flat = dt.to_flat_index();
19354        assert_eq!(dt_flat.len(), 1);
19355        assert_eq!(dt_flat.name(), Some("ts"));
19356        assert!(matches!(
19357            dt_flat.labels()[0],
19358            super::IndexLabel::Datetime64(_)
19359        ));
19360        assert_eq!(dt.to_frame(), dt_flat.to_frame());
19361        assert_eq!(dt.to_series(), dt_flat.to_series());
19362
19363        let td = super::TimedeltaIndex::new(vec![100_i64]).set_name("d");
19364        let td_flat = td.to_flat_index();
19365        assert_eq!(td_flat.len(), 1);
19366        assert_eq!(td_flat.name(), Some("d"));
19367        assert_eq!(td.to_frame(), td_flat.to_frame());
19368        assert_eq!(td.to_series(), td_flat.to_series());
19369
19370        use fp_types::{Period, PeriodFreq};
19371        let pi = super::PeriodIndex::new(vec![Period::new(10, PeriodFreq::Monthly)]).set_name("p");
19372        let pi_flat = pi.to_flat_index();
19373        assert_eq!(pi_flat.len(), 1);
19374        assert!(matches!(pi_flat.labels()[0], super::IndexLabel::Utf8(_)));
19375        assert_eq!(pi.to_frame(), pi_flat.to_frame());
19376        assert_eq!(pi.to_series(), pi_flat.to_series());
19377
19378        let r = super::RangeIndex::new(0, 3, 1).unwrap().set_name("r");
19379        let r_flat = r.to_flat_index();
19380        assert_eq!(r_flat.len(), 3);
19381        assert_eq!(r_flat.name(), Some("r"));
19382        assert_eq!(r.to_frame(), r_flat.to_frame());
19383        assert_eq!(r.to_series(), r_flat.to_series());
19384
19385        let cat = super::CategoricalIndex::from_values(vec!["a".to_owned(), "b".to_owned()], false);
19386        let cat_flat = cat.to_flat_index();
19387        assert_eq!(cat_flat.len(), 2);
19388        assert_eq!(cat.to_frame(), cat_flat.to_frame());
19389        assert_eq!(cat.to_series(), cat_flat.to_series());
19390    }
19391
19392    #[test]
19393    fn index_variants_all_any_forward_flat_truthiness_ejwyw() {
19394        const NS: i64 = 1_000_000_000;
19395
19396        let dt = super::DatetimeIndex::new(vec![0, NS]);
19397        let dt_flat = dt.to_flat_index();
19398        assert_eq!(dt.any(), dt_flat.any());
19399        assert_eq!(dt.all(), dt_flat.all());
19400        assert!(dt.any());
19401        assert!(!dt.all());
19402
19403        let td = super::TimedeltaIndex::new(vec![0, 5]);
19404        let td_flat = td.to_flat_index();
19405        assert_eq!(td.any(), td_flat.any());
19406        assert_eq!(td.all(), td_flat.all());
19407        assert!(td.any());
19408        assert!(!td.all());
19409
19410        use fp_types::{Period, PeriodFreq};
19411        let pi = super::PeriodIndex::new(vec![
19412            Period::new(1, PeriodFreq::Monthly),
19413            Period::new(2, PeriodFreq::Monthly),
19414        ]);
19415        let pi_flat = pi.to_flat_index();
19416        assert_eq!(pi.any(), pi_flat.any());
19417        assert_eq!(pi.all(), pi_flat.all());
19418        assert!(pi.any());
19419        assert!(pi.all());
19420
19421        let range = super::RangeIndex::new(0, 3, 1).unwrap();
19422        let range_flat = range.to_flat_index();
19423        assert_eq!(range.any(), range_flat.any());
19424        assert_eq!(range.all(), range_flat.all());
19425        assert!(range.any());
19426        assert!(!range.all());
19427
19428        let empty_range = super::RangeIndex::new(0, 0, 1).unwrap();
19429        assert!(!empty_range.any());
19430        assert!(empty_range.all());
19431
19432        let cat = super::CategoricalIndex::from_values(vec![String::new(), "x".to_owned()], false);
19433        let cat_flat = cat.to_flat_index();
19434        assert_eq!(cat.any(), cat_flat.any());
19435        assert_eq!(cat.all(), cat_flat.all());
19436        assert!(cat.any());
19437        assert!(!cat.all());
19438    }
19439
19440    #[test]
19441    fn index_variants_get_level_values_forward_flat_xf0zn() -> Result<(), super::IndexError> {
19442        const NS: i64 = 1_000_000_000;
19443
19444        let dt = super::DatetimeIndex::new(vec![NS, 2 * NS]).set_name("ts");
19445        assert_eq!(dt.get_level_values(0)?, dt.to_flat_index());
19446
19447        let td = super::TimedeltaIndex::new(vec![5, 10]).set_name("delta");
19448        assert_eq!(td.get_level_values(0)?, td.to_flat_index());
19449
19450        use fp_types::{Period, PeriodFreq};
19451        let pi =
19452            super::PeriodIndex::new(vec![Period::new(1, PeriodFreq::Monthly)]).set_name("period");
19453        assert_eq!(pi.get_level_values(0)?, pi.to_flat_index());
19454
19455        let range = super::RangeIndex::new(1, 4, 1)?.set_name("row");
19456        assert_eq!(range.get_level_values(0)?, range.to_flat_index());
19457
19458        let cat =
19459            super::CategoricalIndex::from_values(vec!["a".to_owned()], false).set_name("category");
19460        assert_eq!(cat.get_level_values(0)?, cat.to_flat_index());
19461
19462        assert!(matches!(
19463            cat.get_level_values(1),
19464            Err(super::IndexError::OutOfBounds {
19465                position: 1,
19466                length: 1
19467            })
19468        ));
19469
19470        Ok(())
19471    }
19472
19473    #[test]
19474    fn index_variants_droplevel_forward_flat_errors_t8vpw() -> Result<(), super::IndexError> {
19475        const NS: i64 = 1_000_000_000;
19476
19477        let dt = super::DatetimeIndex::new(vec![NS, 2 * NS]).set_name("ts");
19478        assert!(matches!(
19479            dt.droplevel(0),
19480            Err(super::IndexError::InvalidArgument(message))
19481                if message == "cannot remove the only level from a flat Index"
19482        ));
19483
19484        let td = super::TimedeltaIndex::new(vec![5, 10]).set_name("delta");
19485        assert!(matches!(
19486            td.droplevel(0),
19487            Err(super::IndexError::InvalidArgument(message))
19488                if message == "cannot remove the only level from a flat Index"
19489        ));
19490
19491        use fp_types::{Period, PeriodFreq};
19492        let pi =
19493            super::PeriodIndex::new(vec![Period::new(1, PeriodFreq::Monthly)]).set_name("period");
19494        assert!(matches!(
19495            pi.droplevel(0),
19496            Err(super::IndexError::InvalidArgument(message))
19497                if message == "cannot remove the only level from a flat Index"
19498        ));
19499
19500        let range = super::RangeIndex::new(1, 4, 1)?.set_name("row");
19501        assert!(matches!(
19502            range.droplevel(0),
19503            Err(super::IndexError::InvalidArgument(message))
19504                if message == "cannot remove the only level from a flat Index"
19505        ));
19506
19507        let cat =
19508            super::CategoricalIndex::from_values(vec!["a".to_owned()], false).set_name("category");
19509        assert!(matches!(
19510            cat.droplevel(1),
19511            Err(super::IndexError::OutOfBounds {
19512                position: 1,
19513                length: 1
19514            })
19515        ));
19516
19517        Ok(())
19518    }
19519
19520    #[test]
19521    fn index_variants_groupby_forward_flat_buckets_vypi3() {
19522        const NS: i64 = 1_000_000_000;
19523
19524        let dt = super::DatetimeIndex::new(vec![NS, 2 * NS, NS]);
19525        assert_eq!(dt.groupby(), dt.to_flat_index().groupby());
19526
19527        let td = super::TimedeltaIndex::new(vec![5, 10, 5]);
19528        assert_eq!(td.groupby(), td.to_flat_index().groupby());
19529
19530        use fp_types::{Period, PeriodFreq};
19531        let pi = super::PeriodIndex::new(vec![
19532            Period::new(1, PeriodFreq::Monthly),
19533            Period::new(2, PeriodFreq::Monthly),
19534            Period::new(1, PeriodFreq::Monthly),
19535        ]);
19536        assert_eq!(pi.groupby(), pi.to_flat_index().groupby());
19537
19538        let range = super::RangeIndex::new(2, 8, 2).unwrap();
19539        assert_eq!(range.groupby(), range.to_flat_index().groupby());
19540
19541        let cat = super::CategoricalIndex::from_values(
19542            vec!["a".to_owned(), "b".to_owned(), "a".to_owned()],
19543            false,
19544        );
19545        assert_eq!(cat.groupby(), cat.to_flat_index().groupby());
19546        assert_eq!(
19547            cat.groupby()
19548                .get(&super::IndexLabel::Utf8("a".to_owned()))
19549                .cloned(),
19550            Some(vec![0, 2])
19551        );
19552    }
19553
19554    #[test]
19555    fn index_variants_map_forward_flat_and_preserve_name_vxlfs() {
19556        const NS: i64 = 1_000_000_000;
19557
19558        let dt = super::DatetimeIndex::new(vec![NS, 2 * NS]).set_name("ts");
19559        let dt_mapped = dt.map(|label| match label {
19560            super::IndexLabel::Datetime64(nanos) => super::IndexLabel::Int64(*nanos / NS),
19561            other => other.clone(),
19562        });
19563        assert_eq!(
19564            dt_mapped.labels(),
19565            &[super::IndexLabel::Int64(1), super::IndexLabel::Int64(2)]
19566        );
19567        assert_eq!(dt_mapped.name(), Some("ts"));
19568
19569        let td = super::TimedeltaIndex::new(vec![5, 10]).set_name("delta");
19570        assert_eq!(
19571            td.map(|label| match label {
19572                super::IndexLabel::Timedelta64(nanos) => super::IndexLabel::Int64(*nanos * 2),
19573                other => other.clone(),
19574            }),
19575            td.to_flat_index().map(|label| match label {
19576                super::IndexLabel::Timedelta64(nanos) => super::IndexLabel::Int64(*nanos * 2),
19577                other => other.clone(),
19578            })
19579        );
19580
19581        use fp_types::{Period, PeriodFreq};
19582        let pi = super::PeriodIndex::new(vec![Period::new(1, PeriodFreq::Monthly)]);
19583        assert_eq!(
19584            pi.map(|label| super::IndexLabel::Utf8(format!("p:{label}"))),
19585            pi.to_flat_index()
19586                .map(|label| super::IndexLabel::Utf8(format!("p:{label}")))
19587        );
19588
19589        let range = super::RangeIndex::new(1, 4, 1).unwrap();
19590        assert_eq!(
19591            range.map(|label| match label {
19592                super::IndexLabel::Int64(v) => super::IndexLabel::Int64(*v + 10),
19593                other => other.clone(),
19594            }),
19595            range.to_flat_index().map(|label| match label {
19596                super::IndexLabel::Int64(v) => super::IndexLabel::Int64(*v + 10),
19597                other => other.clone(),
19598            })
19599        );
19600
19601        let cat = super::CategoricalIndex::from_values(vec!["a".to_owned()], false);
19602        assert_eq!(
19603            cat.map(|label| super::IndexLabel::Utf8(label.to_string().to_uppercase())),
19604            cat.to_flat_index()
19605                .map(|label| super::IndexLabel::Utf8(label.to_string().to_uppercase()))
19606        );
19607    }
19608
19609    #[test]
19610    fn index_variants_astype_forward_flat_and_preserve_name_o5pyg() {
19611        const NS: i64 = 1_000_000_000;
19612
19613        let dt = super::DatetimeIndex::new(vec![NS, 2 * NS]).set_name("ts");
19614        assert_eq!(
19615            dt.astype("int64").unwrap(),
19616            dt.to_flat_index().astype("int64").unwrap()
19617        );
19618        assert_eq!(dt.astype("int64").unwrap().name(), Some("ts"));
19619        assert!(dt.astype("float64").is_err());
19620
19621        let td = super::TimedeltaIndex::new(vec![5, 10]).set_name("delta");
19622        assert_eq!(
19623            td.astype("string").unwrap(),
19624            td.to_flat_index().astype("string").unwrap()
19625        );
19626        assert_eq!(td.astype("string").unwrap().name(), Some("delta"));
19627
19628        use fp_types::{Period, PeriodFreq};
19629        let pi = super::PeriodIndex::new(vec![Period::new(1, PeriodFreq::Monthly)]);
19630        assert_eq!(
19631            pi.astype("object").unwrap(),
19632            pi.to_flat_index().astype("object").unwrap()
19633        );
19634
19635        let range = super::RangeIndex::new(1, 4, 1).unwrap().set_name("r");
19636        assert_eq!(
19637            range.astype("str").unwrap(),
19638            range.to_flat_index().astype("str").unwrap()
19639        );
19640        assert_eq!(range.astype("str").unwrap().name(), Some("r"));
19641
19642        let cat = super::CategoricalIndex::from_values(vec!["7".to_owned()], false);
19643        assert_eq!(
19644            cat.astype("int").unwrap(),
19645            cat.to_flat_index().astype("int").unwrap()
19646        );
19647        assert!(cat.astype("datetime64[ns]").is_err());
19648    }
19649
19650    #[test]
19651    fn index_variants_asof_forward_flat_and_mask_locs_955dj() {
19652        const NS: i64 = 1_000_000_000;
19653
19654        let dt = super::DatetimeIndex::new(vec![NS, 3 * NS, 5 * NS]);
19655        let dt_key = super::IndexLabel::Datetime64(4 * NS);
19656        assert_eq!(dt.asof(&dt_key), dt.to_flat_index().asof(&dt_key));
19657        assert_eq!(dt.asof(&super::IndexLabel::Datetime64(0)), None);
19658
19659        let td = super::TimedeltaIndex::new(vec![10, 20, 30]);
19660        let where_td = super::Index::new(vec![
19661            super::IndexLabel::Timedelta64(5),
19662            super::IndexLabel::Timedelta64(20),
19663            super::IndexLabel::Timedelta64(25),
19664        ]);
19665        let mask = [false, true, true];
19666        assert_eq!(
19667            td.asof_locs(&where_td, Some(&mask)),
19668            td.to_flat_index().asof_locs(&where_td, Some(&mask))
19669        );
19670        assert_eq!(
19671            td.asof_locs(&where_td, Some(&mask)),
19672            vec![None, Some(1), Some(1)]
19673        );
19674
19675        use fp_types::{Period, PeriodFreq};
19676        let pi = super::PeriodIndex::new(vec![
19677            Period::new(1, PeriodFreq::Monthly),
19678            Period::new(2, PeriodFreq::Monthly),
19679        ]);
19680        let period_key = pi.to_flat_index().labels()[1].clone();
19681        assert_eq!(pi.asof(&period_key), pi.to_flat_index().asof(&period_key));
19682
19683        let range = super::RangeIndex::new(2, 8, 2).unwrap();
19684        let range_key = super::IndexLabel::Int64(5);
19685        assert_eq!(
19686            range.asof(&range_key),
19687            range.to_flat_index().asof(&range_key)
19688        );
19689
19690        let cat = super::CategoricalIndex::from_values(
19691            vec!["a".to_owned(), "c".to_owned(), "e".to_owned()],
19692            false,
19693        );
19694        let cat_key = super::IndexLabel::Utf8("d".to_owned());
19695        assert_eq!(cat.asof(&cat_key), cat.to_flat_index().asof(&cat_key));
19696    }
19697
19698    #[test]
19699    fn index_variants_drop_join_sortlevel_forward_flat_gr6kj() {
19700        const NS: i64 = 1_000_000_000;
19701
19702        let dt = super::DatetimeIndex::new(vec![NS, 3 * NS, 5 * NS]).set_name("ts");
19703        let drop_dt = [super::IndexLabel::Datetime64(3 * NS)];
19704        assert_eq!(dt.drop(&drop_dt), dt.to_flat_index().drop(&drop_dt));
19705        assert_eq!(dt.drop(&drop_dt).name(), Some("ts"));
19706
19707        let td = super::TimedeltaIndex::new(vec![30, 10, 20]);
19708        let (td_sorted, td_order) = td.sortlevel();
19709        let (flat_td_sorted, flat_td_order) = td.to_flat_index().sortlevel();
19710        assert_eq!(td_sorted, flat_td_sorted);
19711        assert_eq!(td_order, flat_td_order);
19712
19713        use fp_types::{Period, PeriodFreq};
19714        let pi = super::PeriodIndex::new(vec![
19715            Period::new(2, PeriodFreq::Monthly),
19716            Period::new(1, PeriodFreq::Monthly),
19717        ]);
19718        assert_eq!(
19719            pi.join(&pi.to_flat_index(), "outer").unwrap(),
19720            pi.to_flat_index()
19721        );
19722
19723        let range = super::RangeIndex::new(2, 8, 2).unwrap();
19724        let other = super::Index::new(vec![
19725            super::IndexLabel::Int64(4),
19726            super::IndexLabel::Int64(6),
19727            super::IndexLabel::Int64(9),
19728        ]);
19729        assert_eq!(
19730            range.join(&other, "inner").unwrap(),
19731            range.to_flat_index().join(&other, "inner").unwrap()
19732        );
19733        assert!(range.join(&other, "sideways").is_err());
19734
19735        let cat = super::CategoricalIndex::from_values(
19736            vec!["b".to_owned(), "a".to_owned(), "b".to_owned()],
19737            false,
19738        );
19739        let (cat_sorted, cat_order) = cat.sortlevel();
19740        let (flat_cat_sorted, flat_cat_order) = cat.to_flat_index().sortlevel();
19741        assert_eq!(cat_sorted, flat_cat_sorted);
19742        assert_eq!(cat_order, flat_cat_order);
19743        let drop_cat = [super::IndexLabel::Utf8("b".to_owned())];
19744        assert_eq!(cat.drop(&drop_cat), cat.to_flat_index().drop(&drop_cat));
19745    }
19746
19747    #[test]
19748    fn index_variants_temporal_rounding_forwarders_dznxu() {
19749        let hour = fp_types::Timedelta::NANOS_PER_HOUR;
19750        let minute = fp_types::Timedelta::NANOS_PER_MIN;
19751        let nat = fp_types::Timedelta::NAT;
19752        let dt =
19753            super::DatetimeIndex::new(vec![hour / 2, hour + 31 * minute, i64::MIN]).set_name("ts");
19754
19755        let dt_floor = dt.floor("h").unwrap();
19756        assert_eq!(dt_floor.asi8(), vec![0, hour, i64::MIN]);
19757        assert_eq!(dt_floor.name(), Some("ts"));
19758
19759        let dt_ceil = dt.ceil("h").unwrap();
19760        assert_eq!(dt_ceil.asi8(), vec![hour, 2 * hour, i64::MIN]);
19761
19762        let dt_round = dt.round("h").unwrap();
19763        assert_eq!(dt_round.asi8(), vec![0, 2 * hour, i64::MIN]);
19764
19765        let dt_snap = dt.snap("h").unwrap();
19766        assert_eq!(dt_snap.asi8(), dt.asi8());
19767        assert!(dt.floor("not-a-frequency").is_err());
19768        assert!(dt.snap("not-a-frequency").is_err());
19769
19770        let td = super::TimedeltaIndex::new(vec![hour / 2, hour + 31 * minute, nat]).set_name("d");
19771        assert_eq!(td.floor("h").unwrap().asi8(), vec![0, hour, nat]);
19772        assert_eq!(td.ceil("h").unwrap().asi8(), vec![hour, 2 * hour, nat]);
19773        assert_eq!(td.round("h").unwrap().asi8(), vec![0, 2 * hour, nat]);
19774        assert_eq!(td.round("h").unwrap().name(), Some("d"));
19775        assert!(td.ceil("not-a-frequency").is_err());
19776
19777        use fp_types::{Period, PeriodFreq};
19778        let periods = super::PeriodIndex::new(vec![
19779            Period::new(10, PeriodFreq::Monthly),
19780            Period::new(11, PeriodFreq::Monthly),
19781        ])
19782        .set_name("p");
19783        let rounded_periods = periods.round("not-a-frequency");
19784        assert_eq!(rounded_periods.values(), periods.values());
19785        assert_eq!(rounded_periods.name(), Some("p"));
19786    }
19787
19788    #[test]
19789    fn index_variants_diff_forwarders_lqs0a() {
19790        let day = fp_types::Timedelta::NANOS_PER_DAY;
19791        let nat = fp_types::Timedelta::NAT;
19792
19793        let dt = super::DatetimeIndex::new(vec![day, 3 * day, i64::MIN, 10 * day]).set_name("ts");
19794        assert_eq!(dt.diff(1).asi8(), vec![nat, 2 * day, nat, nat]);
19795        assert_eq!(dt.diff(-1).asi8(), vec![-2 * day, nat, nat, nat]);
19796        assert_eq!(dt.diff(0).asi8(), vec![0, 0, nat, 0]);
19797        assert_eq!(dt.diff(1).name(), Some("ts"));
19798
19799        let td = super::TimedeltaIndex::new(vec![day, 4 * day, nat, 9 * day]).set_name("delta");
19800        assert_eq!(td.diff(2).asi8(), vec![nat, nat, nat, 5 * day]);
19801        assert_eq!(td.diff(-1).asi8(), vec![-3 * day, nat, nat, nat]);
19802        assert_eq!(td.diff(0).asi8(), vec![0, 0, nat, 0]);
19803        assert_eq!(td.diff(1).name(), Some("delta"));
19804
19805        use fp_types::{Period, PeriodFreq};
19806        let periods = super::PeriodIndex::new(vec![
19807            Period::new(10, PeriodFreq::Monthly),
19808            Period::new(12, PeriodFreq::Monthly),
19809            Period::new(13, PeriodFreq::Quarterly),
19810            Period::new(15, PeriodFreq::Quarterly),
19811        ]);
19812        assert_eq!(periods.diff(1), vec![None, Some(2), None, Some(2)]);
19813        assert_eq!(periods.diff(-1), vec![Some(-2), None, Some(-2), None]);
19814        assert_eq!(periods.diff(0), vec![Some(0), Some(0), Some(0), Some(0)]);
19815
19816        let range = super::RangeIndex::new(2, 10, 2).unwrap().set_name("r");
19817        assert_eq!(range.diff(1), vec![None, Some(2), Some(2), Some(2)]);
19818        assert_eq!(range.diff(-2), vec![Some(-4), Some(-4), None, None]);
19819        assert_eq!(range.diff(0), vec![Some(0), Some(0), Some(0), Some(0)]);
19820        assert_eq!(range.name(), Some("r"));
19821
19822        let cat = super::CategoricalIndex::from_values(vec!["a".to_owned(), "b".to_owned()], false);
19823        let err = cat.diff(1).unwrap_err();
19824        assert!(matches!(
19825            err,
19826            super::IndexError::InvalidArgument(message)
19827                if message.contains("Categorical has no 'diff' method")
19828        ));
19829    }
19830
19831    #[test]
19832    fn datetime_index_to_period_matches_pandas_ordinals_002sq()
19833    -> Result<(), Box<dyn std::error::Error>> {
19834        fn ns(value: &str) -> Result<i64, super::DateRangeError> {
19835            super::parse_datetime_to_nanos(value)
19836        }
19837
19838        use fp_types::{Period, PeriodFreq};
19839
19840        let dt = super::DatetimeIndex::new(vec![
19841            ns("1969-12-31 23:59:59")?,
19842            ns("1970-01-01 00:00:00")?,
19843            ns("2024-02-29 12:34:56")?,
19844        ])
19845        .set_name("ts");
19846
19847        assert_eq!(
19848            dt.to_period("Y")?.values(),
19849            &[
19850                Period::new(-1, PeriodFreq::Annual),
19851                Period::new(0, PeriodFreq::Annual),
19852                Period::new(54, PeriodFreq::Annual),
19853            ]
19854        );
19855        assert_eq!(
19856            dt.to_period("Q")?.values(),
19857            &[
19858                Period::new(-1, PeriodFreq::Quarterly),
19859                Period::new(0, PeriodFreq::Quarterly),
19860                Period::new(216, PeriodFreq::Quarterly),
19861            ]
19862        );
19863        assert_eq!(
19864            dt.to_period("M")?.values(),
19865            &[
19866                Period::new(-1, PeriodFreq::Monthly),
19867                Period::new(0, PeriodFreq::Monthly),
19868                Period::new(649, PeriodFreq::Monthly),
19869            ]
19870        );
19871        assert_eq!(
19872            dt.to_period("D")?.values(),
19873            &[
19874                Period::new(-1, PeriodFreq::Daily),
19875                Period::new(0, PeriodFreq::Daily),
19876                Period::new(19_782, PeriodFreq::Daily),
19877            ]
19878        );
19879        assert_eq!(
19880            dt.to_period("W")?.values(),
19881            &[
19882                Period::new(1, PeriodFreq::Weekly),
19883                Period::new(1, PeriodFreq::Weekly),
19884                Period::new(2_827, PeriodFreq::Weekly),
19885            ]
19886        );
19887        assert_eq!(
19888            dt.to_period("B")?.values(),
19889            &[
19890                Period::new(-1, PeriodFreq::Business),
19891                Period::new(0, PeriodFreq::Business),
19892                Period::new(14_130, PeriodFreq::Business),
19893            ]
19894        );
19895        assert_eq!(
19896            dt.to_period("H")?.values(),
19897            &[
19898                Period::new(-1, PeriodFreq::Hourly),
19899                Period::new(0, PeriodFreq::Hourly),
19900                Period::new(474_780, PeriodFreq::Hourly),
19901            ]
19902        );
19903        let minutely = dt.to_period("min")?;
19904        assert_eq!(
19905            minutely.values(),
19906            &[
19907                Period::new(-1, PeriodFreq::Minutely),
19908                Period::new(0, PeriodFreq::Minutely),
19909                Period::new(28_486_834, PeriodFreq::Minutely),
19910            ]
19911        );
19912        assert_eq!(minutely.name(), Some("ts"));
19913        assert_eq!(
19914            dt.to_period("S")?.values(),
19915            &[
19916                Period::new(-1, PeriodFreq::Secondly),
19917                Period::new(0, PeriodFreq::Secondly),
19918                Period::new(1_709_210_096, PeriodFreq::Secondly),
19919            ]
19920        );
19921
19922        assert!(matches!(
19923            super::DatetimeIndex::new(vec![i64::MIN]).to_period("M"),
19924            Err(super::IndexError::InvalidArgument(message))
19925                if message.contains("invalid or NaT datetime nanos")
19926        ));
19927        assert!(matches!(
19928            dt.to_period("fortnight"),
19929            Err(super::IndexError::InvalidArgument(message))
19930                if message.contains("unsupported frequency")
19931        ));
19932
19933        Ok(())
19934    }
19935
19936    #[test]
19937    fn period_index_asfreq_boundary_conversion_h1zia() -> Result<(), super::IndexError> {
19938        use fp_types::{Period, PeriodFreq};
19939
19940        let annual = super::PeriodIndex::new(vec![
19941            Period::new(0, PeriodFreq::Annual),
19942            Period::new(1, PeriodFreq::Annual),
19943        ])
19944        .set_name("p");
19945        assert_eq!(
19946            annual.asfreq("M")?.values(),
19947            &[
19948                Period::new(11, PeriodFreq::Monthly),
19949                Period::new(23, PeriodFreq::Monthly),
19950            ]
19951        );
19952        let annual_start = annual.asfreq_with_how("M", "start")?;
19953        assert_eq!(
19954            annual_start.values(),
19955            &[
19956                Period::new(0, PeriodFreq::Monthly),
19957                Period::new(12, PeriodFreq::Monthly),
19958            ]
19959        );
19960        assert_eq!(annual_start.name(), Some("p"));
19961
19962        let quarterly = super::PeriodIndex::new(vec![
19963            Period::new(0, PeriodFreq::Quarterly),
19964            Period::new(1, PeriodFreq::Quarterly),
19965        ]);
19966        assert_eq!(
19967            quarterly.asfreq("D")?.values(),
19968            &[
19969                Period::new(89, PeriodFreq::Daily),
19970                Period::new(180, PeriodFreq::Daily),
19971            ]
19972        );
19973        assert_eq!(
19974            quarterly.asfreq_with_how("D", "s")?.values(),
19975            &[
19976                Period::new(0, PeriodFreq::Daily),
19977                Period::new(90, PeriodFreq::Daily),
19978            ]
19979        );
19980
19981        let monthly = super::PeriodIndex::new(vec![
19982            Period::new(0, PeriodFreq::Monthly),
19983            Period::new(1, PeriodFreq::Monthly),
19984        ]);
19985        assert_eq!(
19986            monthly.asfreq("S")?.values(),
19987            &[
19988                Period::new(2_678_399, PeriodFreq::Secondly),
19989                Period::new(5_097_599, PeriodFreq::Secondly),
19990            ]
19991        );
19992        assert_eq!(
19993            monthly.asfreq_with_how("S", "begin")?.values(),
19994            &[
19995                Period::new(0, PeriodFreq::Secondly),
19996                Period::new(2_678_400, PeriodFreq::Secondly),
19997            ]
19998        );
19999        assert_eq!(
20000            monthly.asfreq("B")?.values(),
20001            &[
20002                Period::new(21, PeriodFreq::Business),
20003                Period::new(41, PeriodFreq::Business),
20004            ]
20005        );
20006        assert_eq!(
20007            monthly.asfreq_with_how("W", "start")?.values(),
20008            &[
20009                Period::new(1, PeriodFreq::Weekly),
20010                Period::new(5, PeriodFreq::Weekly),
20011            ]
20012        );
20013        assert!(matches!(
20014            monthly.asfreq_with_how("D", "middle"),
20015            Err(super::IndexError::InvalidArgument(message))
20016                if message.contains("asfreq how must be 'start' or 'end'")
20017        ));
20018        assert!(matches!(
20019            monthly.asfreq("fortnight"),
20020            Err(super::IndexError::InvalidArgument(message))
20021                if message.contains("unsupported frequency")
20022        ));
20023
20024        Ok(())
20025    }
20026
20027    #[test]
20028    fn period_index_timestamp_boundaries_d44wh() -> Result<(), Box<dyn std::error::Error>> {
20029        fn ns(value: &str) -> Result<i64, super::DateRangeError> {
20030            super::parse_datetime_to_nanos(value)
20031        }
20032
20033        use fp_types::{Period, PeriodFreq};
20034
20035        let monthly = super::PeriodIndex::new(vec![
20036            Period::new(0, PeriodFreq::Monthly),
20037            Period::new(1, PeriodFreq::Monthly),
20038        ])
20039        .set_name("period");
20040        assert_eq!(
20041            monthly.start_time()?.asi8(),
20042            vec![ns("1970-01-01 00:00:00")?, ns("1970-02-01 00:00:00")?]
20043        );
20044        assert_eq!(
20045            monthly.end_time()?.asi8(),
20046            vec![
20047                ns("1970-02-01 00:00:00")? - 1,
20048                ns("1970-03-01 00:00:00")? - 1
20049            ]
20050        );
20051        assert_eq!(
20052            monthly.to_timestamp("start")?.asi8(),
20053            monthly.start_time()?.asi8()
20054        );
20055        assert_eq!(
20056            monthly.to_timestamp("end")?.asi8(),
20057            monthly.end_time()?.asi8()
20058        );
20059        assert_eq!(monthly.to_timestamp("")?.name(), Some("period"));
20060        assert_eq!(monthly.qyear()?, vec![1970, 1970]);
20061        assert!(matches!(
20062            monthly.to_timestamp("middle"),
20063            Err(super::IndexError::InvalidArgument(message))
20064                if message.contains("to_timestamp how must be 'start' or 'end'")
20065        ));
20066
20067        let quarterly = super::PeriodIndex::new(vec![
20068            Period::new(-1, PeriodFreq::Quarterly),
20069            Period::new(0, PeriodFreq::Quarterly),
20070        ]);
20071        assert_eq!(
20072            quarterly.start_time()?.asi8(),
20073            vec![ns("1969-10-01 00:00:00")?, ns("1970-01-01 00:00:00")?]
20074        );
20075        assert_eq!(
20076            quarterly.end_time()?.asi8(),
20077            vec![
20078                ns("1970-01-01 00:00:00")? - 1,
20079                ns("1970-04-01 00:00:00")? - 1
20080            ]
20081        );
20082        assert_eq!(quarterly.qyear()?, vec![1969, 1970]);
20083
20084        let mixed_freq = super::PeriodIndex::new(vec![
20085            Period::new(1, PeriodFreq::Weekly),
20086            Period::new(2, PeriodFreq::Business),
20087            Period::new(1, PeriodFreq::Hourly),
20088        ]);
20089        assert_eq!(
20090            mixed_freq.start_time()?.asi8(),
20091            vec![
20092                ns("1969-12-29 00:00:00")?,
20093                ns("1970-01-05 00:00:00")?,
20094                fp_types::Timedelta::NANOS_PER_HOUR
20095            ]
20096        );
20097        assert_eq!(
20098            mixed_freq.end_time()?.asi8(),
20099            vec![
20100                ns("1970-01-05 00:00:00")? - 1,
20101                ns("1970-01-06 00:00:00")? - 1,
20102                2 * fp_types::Timedelta::NANOS_PER_HOUR - 1
20103            ]
20104        );
20105        assert_eq!(mixed_freq.qyear()?, vec![1970, 1970, 1970]);
20106
20107        Ok(())
20108    }
20109
20110    #[test]
20111    fn index_variants_view_transpose_ravel_nlevels_infer_objects_match_pandas_d0ph1() {
20112        const NS: i64 = 1_000_000_000;
20113        let dt = super::DatetimeIndex::new(vec![1_704_067_200_i64 * NS, i64::MIN]).set_name("ts");
20114        assert!(dt.view().equals(&dt));
20115        assert!(dt.transpose().equals(&dt));
20116        assert!(dt.T().identical(&dt));
20117        assert_eq!(dt.ravel(), dt.values());
20118        assert_eq!(dt.nlevels(), 1);
20119        assert!(dt.infer_objects().equals(&dt));
20120
20121        let td = super::TimedeltaIndex::new(vec![100_i64, fp_types::Timedelta::NAT]).set_name("d");
20122        assert!(td.view().equals(&td));
20123        assert!(td.T().identical(&td));
20124        assert_eq!(td.ravel(), td.values());
20125        assert_eq!(td.nlevels(), 1);
20126
20127        use fp_types::{Period, PeriodFreq};
20128        let pi = super::PeriodIndex::new(vec![
20129            Period::new(10, PeriodFreq::Monthly),
20130            Period::new(11, PeriodFreq::Monthly),
20131        ]);
20132        assert_eq!(pi.view().values(), pi.values());
20133        assert!(pi.T().identical(&pi));
20134        assert_eq!(pi.ravel(), pi.values().to_vec());
20135        assert_eq!(pi.nlevels(), 1);
20136
20137        let r = super::RangeIndex::new(0, 5, 1).unwrap();
20138        assert!(r.view().equals(&r));
20139        assert!(r.T().identical(&r));
20140        assert_eq!(r.ravel(), r.values());
20141        assert_eq!(r.nlevels(), 1);
20142
20143        let cat = super::CategoricalIndex::from_values(vec!["a".to_owned(), "b".to_owned()], false);
20144        assert_eq!(cat.view().labels(), cat.labels());
20145        assert!(cat.T().identical(&cat));
20146        assert_eq!(cat.ravel(), cat.labels().to_vec());
20147        assert_eq!(cat.nlevels(), 1);
20148    }
20149
20150    #[test]
20151    fn datetime_index_set_ops_match_pandas_ik8if() {
20152        const NS: i64 = 1_000_000_000;
20153        let a = 1_704_067_200_i64 * NS;
20154        let b = 1_705_276_800_i64 * NS;
20155        let c = 1_706_140_800_i64 * NS;
20156        let d = 1_707_350_400_i64 * NS;
20157        let left = super::DatetimeIndex::new(vec![a, b, c]).set_name("ts");
20158        let right = super::DatetimeIndex::new(vec![b, c, d]).set_name("ts");
20159
20160        // intersection: b, c (in self order).
20161        let inter = left.intersection(&right);
20162        assert_eq!(inter.values(), vec![Some(b), Some(c)]);
20163        assert_eq!(inter.name(), Some("ts"));
20164
20165        // union: a, b, c then d.
20166        let union = left.union(&right);
20167        assert_eq!(union.values(), vec![Some(a), Some(b), Some(c), Some(d)]);
20168
20169        // difference: a (only in self).
20170        let diff = left.difference(&right);
20171        assert_eq!(diff.values(), vec![Some(a)]);
20172
20173        // symmetric_difference: a (self-only) then d (other-only).
20174        let sym = left.symmetric_difference(&right);
20175        assert_eq!(sym.values(), vec![Some(a), Some(d)]);
20176
20177        // Mismatched names drop the name.
20178        let mismatched = super::DatetimeIndex::new(vec![b]).set_name("other");
20179        assert_eq!(left.intersection(&mismatched).name(), None);
20180        assert_eq!(left.union(&mismatched).name(), None);
20181    }
20182
20183    #[test]
20184    fn timedelta_index_set_ops_match_pandas_ik8if() {
20185        let left = super::TimedeltaIndex::new(vec![100_i64, 200, 300]).set_name("d");
20186        let right = super::TimedeltaIndex::new(vec![200_i64, 300, 400]).set_name("d");
20187
20188        let inter = left.intersection(&right);
20189        assert_eq!(inter.values(), vec![Some(200), Some(300)]);
20190        assert_eq!(inter.name(), Some("d"));
20191
20192        let union = left.union(&right);
20193        assert_eq!(
20194            union.values(),
20195            vec![Some(100), Some(200), Some(300), Some(400)]
20196        );
20197
20198        let diff = left.difference(&right);
20199        assert_eq!(diff.values(), vec![Some(100)]);
20200
20201        let sym = left.symmetric_difference(&right);
20202        assert_eq!(sym.values(), vec![Some(100), Some(400)]);
20203    }
20204
20205    #[test]
20206    fn timedelta_index_sum_match_pandas_qi04e() {
20207        let nat = fp_types::Timedelta::NAT;
20208        let td = super::TimedeltaIndex::new(vec![10_i64, 20, 30, nat]);
20209        assert_eq!(td.sum(), Some(60));
20210
20211        let only_nat = super::TimedeltaIndex::new(vec![nat, nat]);
20212        assert_eq!(only_nat.sum(), Some(0));
20213
20214        let empty = super::TimedeltaIndex::new(vec![]);
20215        assert_eq!(empty.sum(), Some(0));
20216    }
20217
20218    #[test]
20219    fn datetime_timedelta_var_match_pandas_pw5sn() {
20220        // [10, 20, 30] sample variance with ddof=1: ((100 + 0 + 100)/2) = 100.
20221        let td = super::TimedeltaIndex::new(vec![10_i64, 20, 30]);
20222        assert!((td.var().unwrap() - 100.0).abs() < 1e-9);
20223
20224        // Single element: not enough data.
20225        let one = super::TimedeltaIndex::new(vec![5_i64]);
20226        assert_eq!(one.var(), None);
20227
20228        // DatetimeIndex spot check.
20229        const NS: i64 = 1_000_000_000;
20230        let dt = super::DatetimeIndex::new(vec![10 * NS, 20 * NS, 30 * NS]);
20231        assert!(dt.var().is_some());
20232    }
20233
20234    #[test]
20235    fn datetime_timedelta_std_match_pandas_3hb3t() {
20236        // [10, 20, 30] sample std with ddof=1: sqrt((100 + 0 + 100) / 2) = 10.
20237        let td = super::TimedeltaIndex::new(vec![10_i64, 20, 30]);
20238        assert_eq!(td.std(), Some(10));
20239
20240        // [10, 30] sample std: sqrt(((10-20)^2 + (30-20)^2) / 1) = sqrt(200).
20241        let td2 = super::TimedeltaIndex::new(vec![10_i64, 30]);
20242        let expected = 200f64.sqrt() as i64;
20243        assert_eq!(td2.std(), Some(expected));
20244
20245        // Single element / NAT-only: not enough data.
20246        let one = super::TimedeltaIndex::new(vec![5_i64]);
20247        assert_eq!(one.std(), None);
20248        let nat = super::TimedeltaIndex::new(vec![fp_types::Timedelta::NAT]);
20249        assert_eq!(nat.std(), None);
20250
20251        // DatetimeIndex spot check.
20252        const NS: i64 = 1_000_000_000;
20253        let dt = super::DatetimeIndex::new(vec![10 * NS, 20 * NS, 30 * NS]);
20254        assert!(dt.std().is_some());
20255    }
20256
20257    #[test]
20258    fn datetime_timedelta_shift_match_pandas_1y3sx() {
20259        const NS: i64 = 1_000_000_000;
20260        let day_ns = 86_400 * NS;
20261        let dt = super::DatetimeIndex::new(vec![1_704_067_200_i64 * NS, i64::MIN]).set_name("ts");
20262
20263        // Shift by 2 days.
20264        let shifted = dt.shift(2, day_ns);
20265        assert_eq!(
20266            shifted.values()[0],
20267            Some(1_704_067_200_i64 * NS + 2 * day_ns)
20268        );
20269        assert_eq!(shifted.values()[1], None);
20270        assert_eq!(shifted.name(), Some("ts"));
20271
20272        // Negative shift.
20273        let back = dt.shift(-1, day_ns);
20274        assert_eq!(back.values()[0], Some(1_704_067_200_i64 * NS - day_ns));
20275
20276        // TimedeltaIndex spot check.
20277        let td = super::TimedeltaIndex::new(vec![100_i64, fp_types::Timedelta::NAT]);
20278        let shifted_td = td.shift(3, 50);
20279        assert_eq!(shifted_td.values()[0], Some(250));
20280        assert_eq!(shifted_td.values()[1], None);
20281    }
20282
20283    #[test]
20284    fn datetime_timedelta_mean_median_match_pandas_wp0gr() {
20285        const NS: i64 = 1_000_000_000;
20286        let a = 1_000_000_000_i64 * NS;
20287        let b = 2_000_000_000_i64 * NS;
20288        let c = 3_000_000_000_i64 * NS;
20289        let dt = super::DatetimeIndex::new(vec![a, b, c, i64::MIN]);
20290        // Mean: (a + b + c) / 3 = b (the middle, since arithmetic).
20291        assert_eq!(dt.mean(), Some(b));
20292        // Median: middle of three sorted values = b.
20293        assert_eq!(dt.median(), Some(b));
20294
20295        // Even-length set: median is average of two middles.
20296        let dt_even = super::DatetimeIndex::new(vec![a, b]);
20297        let total = i128::from(a) + i128::from(b);
20298        let expected = i64::try_from(total / 2).unwrap();
20299        assert_eq!(dt_even.median(), Some(expected));
20300
20301        // All-NAT.
20302        let nat = super::DatetimeIndex::new(vec![i64::MIN; 3]);
20303        assert_eq!(nat.mean(), None);
20304        assert_eq!(nat.median(), None);
20305
20306        // Timedelta spot check.
20307        let td = super::TimedeltaIndex::new(vec![10_i64, 20, 30]);
20308        assert_eq!(td.mean(), Some(20));
20309        assert_eq!(td.median(), Some(20));
20310    }
20311
20312    #[test]
20313    fn datetime_index_min_max_sort_values_match_pandas_kastf() {
20314        const NS: i64 = 1_000_000_000;
20315        let a = 1_704_067_200_i64 * NS;
20316        let b = 1_705_276_800_i64 * NS;
20317        let c = 1_706_140_800_i64 * NS;
20318        let dt = super::DatetimeIndex::new(vec![b, c, i64::MIN, a]).set_name("ts");
20319
20320        assert_eq!(dt.min(), Some(a));
20321        assert_eq!(dt.max(), Some(c));
20322
20323        let sorted = dt.sort_values();
20324        let sorted_alias = dt.sort();
20325        // NAT sorts first (na_position='first' default).
20326        assert_eq!(sorted.values(), vec![None, Some(a), Some(b), Some(c)]);
20327        assert_eq!(sorted_alias.values(), sorted.values());
20328        assert_eq!(sorted.name(), Some("ts"));
20329        assert_eq!(sorted_alias.name(), Some("ts"));
20330
20331        let all_nat = super::DatetimeIndex::new(vec![i64::MIN, i64::MIN]);
20332        assert_eq!(all_nat.min(), None);
20333        assert_eq!(all_nat.max(), None);
20334
20335        let empty = super::DatetimeIndex::new(vec![]);
20336        assert_eq!(empty.min(), None);
20337        assert_eq!(empty.max(), None);
20338        assert!(empty.sort_values().is_empty());
20339        assert!(empty.sort().is_empty());
20340    }
20341
20342    #[test]
20343    fn timedelta_index_min_max_sort_values_match_pandas_kastf() {
20344        let nat = fp_types::Timedelta::NAT;
20345        let td = super::TimedeltaIndex::new(vec![300_i64, nat, 100, 200]).set_name("d");
20346
20347        assert_eq!(td.min(), Some(100));
20348        assert_eq!(td.max(), Some(300));
20349
20350        let sorted = td.sort_values();
20351        let sorted_alias = td.sort();
20352        assert_eq!(sorted.values(), vec![None, Some(100), Some(200), Some(300)]);
20353        assert_eq!(sorted_alias.values(), sorted.values());
20354        assert_eq!(sorted.name(), Some("d"));
20355        assert_eq!(sorted_alias.name(), Some("d"));
20356
20357        let all_nat = super::TimedeltaIndex::new(vec![nat, nat]);
20358        assert_eq!(all_nat.min(), None);
20359        assert_eq!(all_nat.max(), None);
20360
20361        let empty = super::TimedeltaIndex::new(vec![]);
20362        assert_eq!(empty.min(), None);
20363        assert_eq!(empty.max(), None);
20364        assert!(empty.sort().is_empty());
20365    }
20366
20367    #[test]
20368    fn datetime_index_append_delete_match_pandas_834v9() -> Result<(), super::IndexError> {
20369        const NS: i64 = 1_000_000_000;
20370        let a = 1_704_067_200_i64 * NS;
20371        let b = 1_705_276_800_i64 * NS;
20372        let c = 1_706_140_800_i64 * NS;
20373        let left = super::DatetimeIndex::new(vec![a, b]).set_name("ts");
20374        let right = super::DatetimeIndex::new(vec![c]).set_name("ts");
20375
20376        let merged = left.append(&right);
20377        assert_eq!(merged.values(), vec![Some(a), Some(b), Some(c)]);
20378        assert_eq!(merged.name(), Some("ts"));
20379
20380        let mismatched = super::DatetimeIndex::new(vec![c]).set_name("other");
20381        assert_eq!(left.append(&mismatched).name(), None);
20382
20383        let trimmed = left.append(&right).delete(1)?;
20384        assert_eq!(trimmed.values(), vec![Some(a), Some(c)]);
20385        assert_eq!(trimmed.name(), Some("ts"));
20386
20387        let oob = left.delete(5).unwrap_err();
20388        assert!(matches!(
20389            oob,
20390            super::IndexError::OutOfBounds {
20391                position: 5,
20392                length: 2
20393            }
20394        ));
20395        Ok(())
20396    }
20397
20398    #[test]
20399    fn timedelta_index_append_delete_match_pandas_834v9() -> Result<(), super::IndexError> {
20400        let left = super::TimedeltaIndex::new(vec![1_i64, 2]).set_name("d");
20401        let right = super::TimedeltaIndex::new(vec![3_i64]).set_name("d");
20402        let merged = left.append(&right);
20403        assert_eq!(merged.values(), vec![Some(1), Some(2), Some(3)]);
20404        assert_eq!(merged.name(), Some("d"));
20405
20406        let trimmed = merged.delete(0)?;
20407        assert_eq!(trimmed.values(), vec![Some(2), Some(3)]);
20408
20409        assert!(matches!(
20410            left.delete(7).unwrap_err(),
20411            super::IndexError::OutOfBounds {
20412                position: 7,
20413                length: 2
20414            }
20415        ));
20416        Ok(())
20417    }
20418
20419    #[test]
20420    fn period_index_append_delete_match_pandas_834v9() -> Result<(), super::IndexError> {
20421        use fp_types::{Period, PeriodFreq};
20422        let p1 = Period::new(10, PeriodFreq::Monthly);
20423        let p2 = Period::new(11, PeriodFreq::Monthly);
20424        let p3 = Period::new(12, PeriodFreq::Monthly);
20425        let left = super::PeriodIndex::new(vec![p1, p2]).set_name("p");
20426        let right = super::PeriodIndex::new(vec![p3]).set_name("p");
20427
20428        let merged = left.append(&right);
20429        assert_eq!(merged.values(), &[p1, p2, p3]);
20430        assert_eq!(merged.name(), Some("p"));
20431
20432        let mismatched = super::PeriodIndex::new(vec![p3]).set_name("other");
20433        assert_eq!(left.append(&mismatched).name(), None);
20434
20435        let trimmed = merged.delete(1)?;
20436        assert_eq!(trimmed.values(), &[p1, p3]);
20437
20438        assert!(matches!(
20439            left.delete(5).unwrap_err(),
20440            super::IndexError::OutOfBounds {
20441                position: 5,
20442                length: 2
20443            }
20444        ));
20445        Ok(())
20446    }
20447
20448    #[test]
20449    fn range_index_append_delete_match_pandas_834v9() -> Result<(), super::IndexError> {
20450        let left = super::RangeIndex::new(0, 3, 1).unwrap();
20451        let right = super::RangeIndex::new(10, 12, 1).unwrap();
20452        let merged = left.append(&right);
20453        let merged_labels = int64_labels(&merged);
20454        assert_eq!(merged_labels, vec![0, 1, 2, 10, 11]);
20455
20456        let trimmed = left.delete(1)?;
20457        let trimmed_labels = int64_labels(&trimmed);
20458        assert_eq!(trimmed_labels, vec![0, 2]);
20459
20460        assert!(matches!(
20461            left.delete(99).unwrap_err(),
20462            super::IndexError::OutOfBounds {
20463                position: 99,
20464                length: 3
20465            }
20466        ));
20467        Ok(())
20468    }
20469
20470    #[test]
20471    fn datetime_index_take_repeat_isin_match_pandas_bbgg3() -> Result<(), super::IndexError> {
20472        const NS: i64 = 1_000_000_000;
20473        let a = 1_704_067_200_i64 * NS;
20474        let b = 1_705_276_800_i64 * NS;
20475        let c = 1_706_140_800_i64 * NS;
20476        let dt = super::DatetimeIndex::new(vec![a, b, c]).set_name("ts");
20477
20478        let taken = dt.take(&[2, 0, 0])?;
20479        assert_eq!(taken.values(), vec![Some(c), Some(a), Some(a)]);
20480        assert_eq!(taken.name(), Some("ts"));
20481
20482        let oob = dt.take(&[3]).unwrap_err();
20483        assert!(matches!(
20484            oob,
20485            super::IndexError::OutOfBounds {
20486                position: 3,
20487                length: 3
20488            }
20489        ));
20490
20491        let repeated = dt.repeat(2);
20492        assert_eq!(
20493            repeated.values(),
20494            vec![Some(a), Some(a), Some(b), Some(b), Some(c), Some(c)]
20495        );
20496        assert_eq!(repeated.name(), Some("ts"));
20497
20498        let mask = dt.isin(&[a, c]);
20499        assert_eq!(mask, vec![true, false, true]);
20500
20501        let nat_idx = super::DatetimeIndex::new(vec![i64::MIN, a]);
20502        assert_eq!(nat_idx.isin(&[i64::MIN]), vec![true, false]);
20503        Ok(())
20504    }
20505
20506    #[test]
20507    fn timedelta_index_take_repeat_isin_match_pandas_bbgg3() -> Result<(), super::IndexError> {
20508        let td = super::TimedeltaIndex::new(vec![100_i64, 200, 300]).set_name("d");
20509        let taken = td.take(&[2, 0])?;
20510        assert_eq!(taken.values(), vec![Some(300), Some(100)]);
20511        assert_eq!(taken.name(), Some("d"));
20512
20513        assert!(matches!(
20514            td.take(&[7]).unwrap_err(),
20515            super::IndexError::OutOfBounds {
20516                position: 7,
20517                length: 3
20518            }
20519        ));
20520
20521        let repeated = td.repeat(2);
20522        assert_eq!(
20523            repeated.values(),
20524            vec![
20525                Some(100),
20526                Some(100),
20527                Some(200),
20528                Some(200),
20529                Some(300),
20530                Some(300)
20531            ]
20532        );
20533
20534        let mask = td.isin(&[200, 999]);
20535        assert_eq!(mask, vec![false, true, false]);
20536        Ok(())
20537    }
20538
20539    #[test]
20540    fn period_index_take_repeat_isin_match_pandas_bbgg3() -> Result<(), super::IndexError> {
20541        use fp_types::{Period, PeriodFreq};
20542        let p1 = Period::new(10, PeriodFreq::Monthly);
20543        let p2 = Period::new(11, PeriodFreq::Monthly);
20544        let p3 = Period::new(12, PeriodFreq::Monthly);
20545        let pi = super::PeriodIndex::new(vec![p1, p2, p3]).set_name("pp");
20546
20547        let taken = pi.take(&[2, 1])?;
20548        assert_eq!(taken.values(), &[p3, p2]);
20549        assert_eq!(taken.name(), Some("pp"));
20550
20551        assert!(matches!(
20552            pi.take(&[5]).unwrap_err(),
20553            super::IndexError::OutOfBounds {
20554                position: 5,
20555                length: 3
20556            }
20557        ));
20558
20559        let repeated = pi.repeat(2);
20560        assert_eq!(repeated.values(), &[p1, p1, p2, p2, p3, p3]);
20561
20562        let mask = pi.isin(&[p1, p3]);
20563        assert_eq!(mask, vec![true, false, true]);
20564        Ok(())
20565    }
20566
20567    #[test]
20568    fn range_index_take_repeat_isin_match_pandas_bbgg3() -> Result<(), super::IndexError> {
20569        let r = super::RangeIndex::new(0, 5, 1).unwrap();
20570        let taken = r.take(&[2, 4, 0])?;
20571        let labels = int64_labels(&taken);
20572        assert_eq!(labels, vec![2, 4, 0]);
20573
20574        assert!(matches!(
20575            r.take(&[10]).unwrap_err(),
20576            super::IndexError::OutOfBounds {
20577                position: 10,
20578                length: 5
20579            }
20580        ));
20581
20582        let repeated = r.repeat(2);
20583        let repeat_labels = int64_labels(&repeated);
20584        assert_eq!(repeat_labels, vec![0, 0, 1, 1, 2, 2, 3, 3, 4, 4]);
20585
20586        let mask = r.isin(&[1, 3, 99]);
20587        assert_eq!(mask, vec![false, true, false, true, false]);
20588        Ok(())
20589    }
20590
20591    #[test]
20592    fn period_index_forwarder_methods_match_pandas_zke9k() {
20593        use fp_types::{Period, PeriodFreq};
20594        let p1 = Period::new(10, PeriodFreq::Monthly);
20595        let p2 = Period::new(11, PeriodFreq::Monthly);
20596        let p3 = Period::new(12, PeriodFreq::Monthly);
20597        let pi = super::PeriodIndex::new(vec![p1, p2, p1, p3, p2, p1]).set_name("p");
20598
20599        let unique = pi.unique();
20600        assert_eq!(unique.values(), &[p1, p2, p3]);
20601        assert_eq!(unique.name(), Some("p"));
20602
20603        let dup_first = pi.duplicated(super::DuplicateKeep::First);
20604        assert_eq!(dup_first, vec![false, false, true, false, true, true]);
20605
20606        let dup_last = pi.duplicated(super::DuplicateKeep::Last);
20607        assert_eq!(dup_last, vec![true, true, true, false, false, false]);
20608
20609        let dup_none = pi.duplicated(super::DuplicateKeep::None);
20610        // None marks every position whose value occurs >1 time.
20611        assert_eq!(dup_none, vec![true, true, true, false, true, true]);
20612
20613        let dropped = pi.drop_duplicates();
20614        assert_eq!(dropped.values(), &[p1, p2, p3]);
20615
20616        let counts = pi.value_counts();
20617        let total: usize = counts.iter().map(|(_, n)| n).sum();
20618        assert_eq!(total, pi.len());
20619        // First entry is the most frequent (p1 with 3 occurrences).
20620        assert_eq!(counts[0].1, 3);
20621        let p1_count = counts
20622            .iter()
20623            .find_map(|(period, n)| (*period == p1).then_some(*n))
20624            .expect("p1 should be counted");
20625        assert_eq!(p1_count, 3);
20626
20627        let (codes, factor_uniques) = pi.factorize();
20628        assert_eq!(codes, vec![0, 1, 0, 2, 1, 0]);
20629        assert_eq!(factor_uniques.values(), &[p1, p2, p3]);
20630    }
20631
20632    #[test]
20633    fn period_index_unique_handles_empty_zke9k() {
20634        let pi = super::PeriodIndex::new(Vec::new());
20635        assert!(pi.unique().is_empty());
20636        assert!(pi.drop_duplicates().is_empty());
20637        assert!(pi.value_counts().is_empty());
20638        let (codes, uniques) = pi.factorize();
20639        assert!(codes.is_empty());
20640        assert!(uniques.is_empty());
20641    }
20642
20643    #[test]
20644    fn categorical_index_missingness_methods_are_closed_form_c0knj() {
20645        let cat = super::CategoricalIndex::from_values(
20646            vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
20647            false,
20648        );
20649        assert_eq!(cat.isnull(), vec![false, false, false]);
20650        assert_eq!(cat.notnull(), vec![true, true, true]);
20651        assert!(!cat.hasnans());
20652        let dropped = cat.dropna();
20653        assert_eq!(dropped.labels(), cat.labels());
20654        let filled = cat.fillna("z");
20655        assert_eq!(filled.labels(), cat.labels());
20656
20657        let empty = super::CategoricalIndex::from_values(Vec::<String>::new(), false);
20658        assert_eq!(empty.isnull(), Vec::<bool>::new());
20659        assert!(!empty.hasnans());
20660    }
20661
20662    #[test]
20663    fn categorical_index_append_delete_insert_repeat_match_pandas_tns52()
20664    -> Result<(), super::IndexError> {
20665        let cat = super::CategoricalIndex::with_categories(
20666            vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
20667            vec![
20668                "a".to_owned(),
20669                "b".to_owned(),
20670                "c".to_owned(),
20671                "d".to_owned(),
20672            ],
20673            false,
20674        )?
20675        .set_name("level");
20676
20677        // append: merge categories.
20678        let other = super::CategoricalIndex::with_categories(
20679            vec!["d".to_owned()],
20680            vec!["d".to_owned(), "e".to_owned()],
20681            false,
20682        )?
20683        .set_name("level");
20684        let merged = cat.append(&other);
20685        assert_eq!(
20686            merged.labels(),
20687            vec![
20688                "a".to_owned(),
20689                "b".to_owned(),
20690                "c".to_owned(),
20691                "d".to_owned()
20692            ]
20693            .as_slice()
20694        );
20695        assert_eq!(merged.name(), Some("level"));
20696        assert!(merged.categories().contains(&"e".to_owned()));
20697
20698        // delete OOB.
20699        assert!(matches!(
20700            cat.delete(99).unwrap_err(),
20701            super::IndexError::OutOfBounds {
20702                position: 99,
20703                length: 3
20704            }
20705        ));
20706        let trimmed = cat.delete(0)?;
20707        assert_eq!(
20708            trimmed.labels(),
20709            vec!["b".to_owned(), "c".to_owned()].as_slice()
20710        );
20711
20712        // insert.
20713        let inserted = cat.insert(1, "d")?;
20714        assert_eq!(
20715            inserted.labels(),
20716            vec![
20717                "a".to_owned(),
20718                "d".to_owned(),
20719                "b".to_owned(),
20720                "c".to_owned()
20721            ]
20722            .as_slice()
20723        );
20724        assert!(cat.insert(1, "zzz").is_err());
20725
20726        // repeat.
20727        let repeated = cat.repeat(2);
20728        assert_eq!(repeated.labels().len(), 6);
20729        assert_eq!(repeated.labels()[0], "a");
20730        assert_eq!(repeated.labels()[1], "a");
20731        assert_eq!(repeated.labels()[2], "b");
20732        Ok(())
20733    }
20734
20735    #[test]
20736    fn categorical_index_slice_locs_indexer_match_pandas_y93vb() -> Result<(), super::IndexError> {
20737        let cat = super::CategoricalIndex::with_categories(
20738            vec![
20739                "a".to_owned(),
20740                "b".to_owned(),
20741                "c".to_owned(),
20742                "d".to_owned(),
20743            ],
20744            vec![
20745                "a".to_owned(),
20746                "b".to_owned(),
20747                "c".to_owned(),
20748                "d".to_owned(),
20749            ],
20750            true,
20751        )?;
20752        assert_eq!(cat.slice_locs("b", "c")?, (1, 3));
20753        assert_eq!(cat.slice_indexer("b", "c")?, 1..3);
20754        assert_eq!(cat.slice_locs("a", "d")?, (0, 4));
20755
20756        // Non-monotonic rejects.
20757        let unsorted = super::CategoricalIndex::from_values(
20758            vec!["c".to_owned(), "a".to_owned(), "b".to_owned()],
20759            false,
20760        );
20761        assert!(unsorted.slice_locs("a", "c").is_err());
20762        Ok(())
20763    }
20764
20765    #[test]
20766    fn categorical_index_searchsorted_set_ops_match_pandas_cmvs7() -> Result<(), super::IndexError>
20767    {
20768        let cat = super::CategoricalIndex::with_categories(
20769            vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
20770            vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
20771            true,
20772        )?;
20773
20774        // searchsorted on the sorted utf8 index.
20775        assert_eq!(cat.searchsorted("b", "left")?, 1);
20776        assert_eq!(cat.searchsorted("c", "right")?, 3);
20777        assert!(cat.searchsorted("b", "middle").is_err());
20778
20779        let other = super::CategoricalIndex::from_values(
20780            vec!["b".to_owned(), "c".to_owned(), "d".to_owned()],
20781            false,
20782        );
20783        assert_eq!(
20784            cat.intersection(&other).labels(),
20785            vec!["b".to_owned(), "c".to_owned()].as_slice()
20786        );
20787        assert_eq!(
20788            cat.union(&other).labels(),
20789            vec![
20790                "a".to_owned(),
20791                "b".to_owned(),
20792                "c".to_owned(),
20793                "d".to_owned(),
20794            ]
20795            .as_slice()
20796        );
20797        assert_eq!(
20798            cat.difference(&other).labels(),
20799            vec!["a".to_owned()].as_slice()
20800        );
20801        // symmetric_difference: a (only in cat) + d (only in other).
20802        assert_eq!(
20803            cat.symmetric_difference(&other).labels(),
20804            vec!["a".to_owned(), "d".to_owned()].as_slice()
20805        );
20806        Ok(())
20807    }
20808
20809    #[test]
20810    fn categorical_index_argmax_argmin_match_pandas_d46wi() -> Result<(), super::IndexError> {
20811        let cat = super::CategoricalIndex::with_categories(
20812            vec!["b".to_owned(), "a".to_owned(), "c".to_owned()],
20813            vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
20814            true,
20815        )?;
20816        assert_eq!(cat.argmax()?, 2);
20817        assert_eq!(cat.argmin()?, 1);
20818
20819        let empty = super::CategoricalIndex::from_values(Vec::<String>::new(), false);
20820        assert!(empty.argmax().is_err());
20821        assert!(empty.argmin().is_err());
20822        Ok(())
20823    }
20824
20825    #[test]
20826    fn categorical_index_forwarders_match_pandas_e2p82() -> Result<(), super::IndexError> {
20827        let cat = super::CategoricalIndex::with_categories(
20828            vec![
20829                "b".to_owned(),
20830                "a".to_owned(),
20831                "c".to_owned(),
20832                "a".to_owned(),
20833            ],
20834            vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
20835            true,
20836        )?;
20837
20838        // argsort: positions sorted by lexicographic label.
20839        let positions = cat.argsort();
20840        let labels: Vec<&str> = positions
20841            .iter()
20842            .map(|&p| cat.labels()[p].as_str())
20843            .collect();
20844        for w in labels.windows(2) {
20845            assert!(w[0] <= w[1]);
20846        }
20847
20848        // take swaps positions.
20849        let taken = cat.take(&[2, 0, 0])?;
20850        assert_eq!(
20851            taken.labels(),
20852            vec!["c".to_owned(), "b".to_owned(), "b".to_owned()].as_slice()
20853        );
20854        assert!(matches!(
20855            cat.take(&[7]).unwrap_err(),
20856            super::IndexError::OutOfBounds {
20857                position: 7,
20858                length: 4
20859            }
20860        ));
20861
20862        // isin membership.
20863        assert_eq!(
20864            cat.isin(&["a".to_owned(), "z".to_owned()]),
20865            vec![false, true, false, true]
20866        );
20867
20868        // get_loc finds first; missing rejects.
20869        assert_eq!(cat.get_loc("c")?, 2);
20870        assert!(cat.get_loc("zzz").is_err());
20871
20872        // min/max with ordered=true uses category order.
20873        assert_eq!(cat.min(), Some("a"));
20874        assert_eq!(cat.max(), Some("c"));
20875
20876        // Empty.
20877        let empty = super::CategoricalIndex::from_values(Vec::<String>::new(), false);
20878        assert_eq!(empty.min(), None);
20879        assert_eq!(empty.max(), None);
20880        assert!(empty.argsort().is_empty());
20881        Ok(())
20882    }
20883
20884    #[test]
20885    fn categorical_index_category_management_match_pandas_zy2vd() -> Result<(), super::IndexError> {
20886        let cat = super::CategoricalIndex::with_categories(
20887            vec!["a".to_owned(), "b".to_owned()],
20888            vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
20889            false,
20890        )?;
20891
20892        // as_ordered / as_unordered.
20893        assert!(cat.as_ordered().ordered());
20894        assert!(!cat.as_ordered().as_unordered().ordered());
20895
20896        // add_categories: appends "d".
20897        let added = cat.add_categories(vec!["d".to_owned()])?;
20898        assert_eq!(
20899            added.categories(),
20900            vec![
20901                "a".to_owned(),
20902                "b".to_owned(),
20903                "c".to_owned(),
20904                "d".to_owned()
20905            ]
20906            .as_slice()
20907        );
20908        // Adding a duplicate rejects.
20909        assert!(cat.add_categories(vec!["a".to_owned()]).is_err());
20910
20911        // remove_categories drops "c" (unused).
20912        let pruned = cat.remove_categories(&["c".to_owned()])?;
20913        assert_eq!(
20914            pruned.categories(),
20915            vec!["a".to_owned(), "b".to_owned()].as_slice()
20916        );
20917        // Trying to remove a category that's still in use rejects.
20918        assert!(cat.remove_categories(&["a".to_owned()]).is_err());
20919        // Removing a missing category rejects.
20920        assert!(cat.remove_categories(&["zzz".to_owned()]).is_err());
20921
20922        // remove_unused_categories trims "c" automatically.
20923        let trimmed = cat.remove_unused_categories();
20924        assert_eq!(
20925            trimmed.categories(),
20926            vec!["a".to_owned(), "b".to_owned()].as_slice()
20927        );
20928
20929        // set_categories: extending to {a, b, c, d}.
20930        let extended = cat.set_categories(vec![
20931            "a".to_owned(),
20932            "b".to_owned(),
20933            "c".to_owned(),
20934            "d".to_owned(),
20935        ])?;
20936        assert_eq!(extended.categories().len(), 4);
20937        // set_categories must include every current label.
20938        assert!(
20939            cat.set_categories(vec!["b".to_owned(), "c".to_owned()])
20940                .is_err()
20941        );
20942
20943        // rename_categories: a→A, b→B, c→C.
20944        let renamed =
20945            cat.rename_categories(vec!["A".to_owned(), "B".to_owned(), "C".to_owned()])?;
20946        assert_eq!(
20947            renamed.labels(),
20948            vec!["A".to_owned(), "B".to_owned()].as_slice()
20949        );
20950        assert_eq!(
20951            renamed.categories(),
20952            vec!["A".to_owned(), "B".to_owned(), "C".to_owned()].as_slice()
20953        );
20954        // Wrong length rejects.
20955        assert!(cat.rename_categories(vec!["X".to_owned()]).is_err());
20956
20957        // reorder_categories: permutation passes; ordered flag flips.
20958        let reordered =
20959            cat.reorder_categories(vec!["c".to_owned(), "b".to_owned(), "a".to_owned()], true)?;
20960        assert!(reordered.ordered());
20961        assert_eq!(
20962            reordered.categories(),
20963            vec!["c".to_owned(), "b".to_owned(), "a".to_owned()].as_slice()
20964        );
20965        // Non-permutation rejects.
20966        assert!(
20967            cat.reorder_categories(vec!["a".to_owned(), "b".to_owned(), "x".to_owned()], false)
20968                .is_err()
20969        );
20970        // Duplicate-bearing input rejects.
20971        assert!(
20972            cat.reorder_categories(vec!["a".to_owned(), "a".to_owned(), "b".to_owned()], false)
20973                .is_err()
20974        );
20975
20976        Ok(())
20977    }
20978
20979    #[test]
20980    fn categorical_index_forwarder_methods_match_pandas_i1q1c() {
20981        let labels = vec![
20982            "low".to_owned(),
20983            "high".to_owned(),
20984            "low".to_owned(),
20985            "med".to_owned(),
20986            "high".to_owned(),
20987            "low".to_owned(),
20988        ];
20989        let categorical =
20990            super::CategoricalIndex::from_values(labels.clone(), false).set_name("level");
20991
20992        // unique: first-seen low, high, med.
20993        let unique = categorical.unique();
20994        assert_eq!(
20995            unique.labels(),
20996            vec!["low".to_owned(), "high".to_owned(), "med".to_owned()].as_slice()
20997        );
20998        assert_eq!(unique.name(), Some("level"));
20999
21000        // duplicated keep=First: positions 2, 4, 5 are duplicates of earlier.
21001        let dup_first = categorical.duplicated(super::DuplicateKeep::First);
21002        assert_eq!(dup_first, vec![false, false, true, false, true, true]);
21003
21004        // drop_duplicates returns same as unique.
21005        let dropped = categorical.drop_duplicates();
21006        assert_eq!(dropped.labels(), unique.labels());
21007
21008        // value_counts: low=3, high=2, med=1; total = 6 = len.
21009        let counts = categorical.value_counts();
21010        let total: usize = counts.iter().map(|(_, n)| n).sum();
21011        assert_eq!(total, categorical.len());
21012        let low_count = counts
21013            .iter()
21014            .find_map(|(label, n)| (label == "low").then_some(*n))
21015            .expect("low should be counted");
21016        assert_eq!(low_count, 3);
21017        // First entry is the most frequent (descending sort).
21018        assert_eq!(counts[0].1, 3);
21019
21020        // factorize: codes encode first-seen positions; uniques == unique().
21021        let (codes, factor_uniques) = categorical.factorize();
21022        assert_eq!(codes.len(), categorical.len());
21023        assert_eq!(codes, vec![0, 1, 0, 2, 1, 0]);
21024        assert_eq!(factor_uniques.labels(), unique.labels());
21025    }
21026
21027    #[test]
21028    fn categorical_index_unique_preserves_categories_and_ordered_i1q1c() {
21029        let labels = vec!["a".to_owned(), "b".to_owned(), "a".to_owned()];
21030        let categories = vec!["a".to_owned(), "b".to_owned(), "c".to_owned()];
21031        let cat = super::CategoricalIndex::with_categories(labels, categories.clone(), true)
21032            .expect("with_categories");
21033        let unique = cat.unique();
21034        assert_eq!(unique.categories(), categories.as_slice());
21035        assert!(unique.ordered());
21036    }
21037
21038    #[test]
21039    fn timedelta_index_forwarder_methods_match_index_vq4pf() -> Result<(), super::IndexError> {
21040        let a: i64 = 1_000;
21041        let b: i64 = 2_000;
21042        let c: i64 = 3_000;
21043        let nat = fp_types::Timedelta::NAT;
21044
21045        // a, c, b, a, NAT, c (duplicates + NAT)
21046        let td = super::TimedeltaIndex::new(vec![a, c, b, a, nat, c]);
21047
21048        assert_eq!(td.argmax()?, 1);
21049        assert_eq!(td.argmin()?, 0);
21050
21051        let positions = td.argsort();
21052        assert_eq!(positions.len(), td.len());
21053
21054        let unique = td.unique()?;
21055        assert_eq!(unique.values(), vec![Some(a), Some(c), Some(b), None]);
21056
21057        let (codes, uniques) = td.factorize()?;
21058        assert_eq!(codes.len(), td.len());
21059        assert_eq!(uniques.values(), vec![Some(a), Some(c), Some(b)]);
21060        assert_eq!(codes[4], -1);
21061
21062        let counts = td.value_counts();
21063        let total: usize = counts.iter().map(|(_, n)| n).sum();
21064        assert_eq!(total, 5); // NAT dropped
21065
21066        let dup_first = td.duplicated(super::DuplicateKeep::First);
21067        assert_eq!(dup_first, vec![false, false, false, true, false, true]);
21068
21069        let deduped = td.drop_duplicates()?;
21070        assert_eq!(deduped.values(), vec![Some(a), Some(c), Some(b), None]);
21071
21072        let dropped = td.dropna();
21073        assert_eq!(
21074            dropped.values(),
21075            vec![Some(a), Some(c), Some(b), Some(a), Some(c)]
21076        );
21077        Ok(())
21078    }
21079
21080    #[test]
21081    fn timedelta_index_argmax_argmin_reject_empty_vq4pf() {
21082        let empty = super::TimedeltaIndex::new(vec![]);
21083        let err_max = empty.argmax().unwrap_err();
21084        assert!(matches!(
21085            err_max,
21086            super::IndexError::InvalidArgument(ref message)
21087                if message == "attempt to get argmax of an empty sequence"
21088        ));
21089        let err_min = empty.argmin().unwrap_err();
21090        assert!(matches!(
21091            err_min,
21092            super::IndexError::InvalidArgument(ref message)
21093                if message == "attempt to get argmin of an empty sequence"
21094        ));
21095
21096        let only_nat =
21097            super::TimedeltaIndex::new(vec![fp_types::Timedelta::NAT, fp_types::Timedelta::NAT]);
21098        assert!(only_nat.argmax().is_err());
21099        assert!(only_nat.argmin().is_err());
21100    }
21101
21102    #[test]
21103    fn timedelta_index_dropna_preserves_name_vq4pf() {
21104        let td =
21105            super::TimedeltaIndex::new(vec![fp_types::Timedelta::NAT, 0_i64]).set_name("delta");
21106        let dropped = td.dropna();
21107        assert_eq!(dropped.values(), vec![Some(0)]);
21108        assert_eq!(dropped.name(), Some("delta"));
21109    }
21110
21111    #[test]
21112    fn datetime_index_forwarder_methods_match_index_z9guv() -> Result<(), super::IndexError> {
21113        const NS: i64 = 1_000_000_000;
21114        let a = 1_704_067_200_i64 * NS;
21115        let b = 1_705_276_800_i64 * NS;
21116        let c = 1_706_140_800_i64 * NS;
21117
21118        // a, c, b, a, NAT, c (duplicates + NAT to exercise every branch).
21119        let dt = super::DatetimeIndex::new(vec![a, c, b, a, i64::MIN, c]);
21120
21121        // argmax / argmin skip NAT to match pandas skipna=True default.
21122        assert_eq!(dt.argmax()?, 1); // c at position 1 is first-seen max
21123        assert_eq!(dt.argmin()?, 0); // a at position 0 is first-seen min
21124
21125        // argsort returns positions in ascending label order (NAT sorts lowest
21126        // because i64::MIN < every datetime). Stable on ties.
21127        let positions = dt.argsort();
21128        assert_eq!(positions.len(), dt.len());
21129        let sorted_labels: Vec<&super::IndexLabel> = positions
21130            .iter()
21131            .map(|&p| &dt.as_index().labels()[p])
21132            .collect();
21133        for w in sorted_labels.windows(2) {
21134            assert!(w[0].cmp(w[1]).is_le());
21135        }
21136
21137        let unique = dt.unique()?;
21138        // First-seen order including NAT: a, c, b, NAT.
21139        assert_eq!(unique.values(), vec![Some(a), Some(c), Some(b), None]);
21140
21141        let (codes, uniques) = dt.factorize()?;
21142        assert_eq!(codes.len(), dt.len());
21143        // factorize skips NAT in the uniques and emits -1 codes for NAT inputs
21144        // (matches pandas).
21145        assert_eq!(uniques.values(), vec![Some(a), Some(c), Some(b)]);
21146        // Position 4 is the NAT input.
21147        assert_eq!(codes[4], -1);
21148
21149        let counts = dt.value_counts();
21150        // value_counts drops NAT by default (matches pandas dropna=True), so
21151        // total = 5: a:2, c:2, b:1.
21152        let total_count: usize = counts.iter().map(|(_, n)| n).sum();
21153        assert_eq!(total_count, 5);
21154        let a_count = counts
21155            .iter()
21156            .find_map(|(label, n)| match label {
21157                super::IndexLabel::Datetime64(nanos) if *nanos == a => Some(*n),
21158                _ => None,
21159            })
21160            .expect("a should be counted");
21161        assert_eq!(a_count, 2);
21162
21163        let dup_first = dt.duplicated(super::DuplicateKeep::First);
21164        // Positions 3 (second a) and 5 (second c) are duplicates.
21165        assert_eq!(dup_first, vec![false, false, false, true, false, true]);
21166
21167        let deduped = dt.drop_duplicates()?;
21168        // drop_duplicates preserves the NAT entry — only literal duplicates go.
21169        assert_eq!(deduped.values(), vec![Some(a), Some(c), Some(b), None]);
21170
21171        let dropped = dt.dropna();
21172        assert_eq!(
21173            dropped.values(),
21174            vec![Some(a), Some(c), Some(b), Some(a), Some(c)]
21175        );
21176        Ok(())
21177    }
21178
21179    #[test]
21180    fn datetime_index_argmax_argmin_reject_empty_z9guv() {
21181        let empty = super::DatetimeIndex::new(vec![]);
21182        let err_max = empty.argmax().unwrap_err();
21183        assert!(matches!(
21184            err_max,
21185            super::IndexError::InvalidArgument(ref message)
21186                if message == "attempt to get argmax of an empty sequence"
21187        ));
21188        let err_min = empty.argmin().unwrap_err();
21189        assert!(matches!(
21190            err_min,
21191            super::IndexError::InvalidArgument(ref message)
21192                if message == "attempt to get argmin of an empty sequence"
21193        ));
21194        assert!(empty.argsort().is_empty());
21195        assert!(empty.dropna().is_empty());
21196    }
21197
21198    #[test]
21199    fn datetime_index_dropna_preserves_name_z9guv() {
21200        let dt = super::DatetimeIndex::new(vec![i64::MIN, 0_i64, i64::MIN]).set_name("ts");
21201        let dropped = dt.dropna();
21202        assert_eq!(dropped.values(), vec![Some(0)]);
21203        assert_eq!(dropped.name(), Some("ts"));
21204    }
21205
21206    #[test]
21207    fn datetime_index_asi8_round_trips_nanos_teeck() {
21208        const NS: i64 = 1_000_000_000;
21209        let total: i64 = 1_704_067_200_i64 * NS + 123;
21210        let dt = super::DatetimeIndex::new(vec![total, i64::MIN, 0]);
21211        assert_eq!(dt.asi8(), vec![total, i64::MIN, 0]);
21212
21213        let empty = super::DatetimeIndex::new(vec![]);
21214        assert!(empty.asi8().is_empty());
21215    }
21216
21217    #[test]
21218    fn datetime_index_strftime_formats_each_label_teeck() {
21219        const NS: i64 = 1_000_000_000;
21220        // 2024-01-15T12:34:56.789Z:
21221        //   2024-01-01 00:00:00Z = 1704067200 sec.
21222        //   + 14 * 86400 = 1209600  -> 2024-01-15 00:00:00 = 1705276800
21223        //   + 12*3600 + 34*60 + 56  -> 1705322096
21224        //   * 1e9 + 789_000_000 ns
21225        let with_ms: i64 = 1_705_322_096_i64 * NS + 789_000_000;
21226        let dt = super::DatetimeIndex::new(vec![with_ms, i64::MIN]);
21227        let formatted = dt.strftime("%Y-%m-%dT%H:%M:%S%.3f");
21228        assert_eq!(
21229            formatted,
21230            vec![Some("2024-01-15T12:34:56.789".to_owned()), None]
21231        );
21232    }
21233
21234    #[test]
21235    fn timedelta_index_asi8_microseconds_nanoseconds_match_pandas_teeck() -> Result<(), &'static str>
21236    {
21237        // 1 day + 2:34:56.789012345
21238        let one_day = fp_types::Timedelta::NANOS_PER_DAY;
21239        let extra = 2 * fp_types::Timedelta::NANOS_PER_HOUR
21240            + 34 * fp_types::Timedelta::NANOS_PER_MIN
21241            + 56 * fp_types::Timedelta::NANOS_PER_SEC
21242            + 789_012_345;
21243        let total = one_day + extra;
21244        let td = super::TimedeltaIndex::new(vec![total, fp_types::Timedelta::NAT, 0, -1]);
21245
21246        assert_eq!(td.asi8(), vec![total, fp_types::Timedelta::NAT, 0, -1]);
21247        // microseconds: 789_012_345 % 1_000_000_000 / 1_000 == 789_012
21248        assert_eq!(
21249            td.microseconds(),
21250            vec![Some(789_012), None, Some(0), Some(999_999)]
21251        );
21252        // nanoseconds: 789_012_345 % 1_000 == 345
21253        assert_eq!(td.nanoseconds(), vec![Some(345), None, Some(0), Some(999)]);
21254
21255        let components = td.components();
21256        let positive = components
21257            .first()
21258            .copied()
21259            .flatten()
21260            .ok_or("positive components")?;
21261        assert_eq!(positive.days, 1);
21262        assert_eq!(positive.hours, 2);
21263        assert_eq!(positive.minutes, 34);
21264        assert_eq!(positive.seconds, 56);
21265        assert_eq!(positive.milliseconds, 789);
21266        assert_eq!(positive.microseconds, 12);
21267        assert_eq!(positive.nanoseconds, 345);
21268
21269        assert_eq!(
21270            components.get(1).copied().flatten().map(|row| row.days),
21271            None
21272        );
21273
21274        let zero = components
21275            .get(2)
21276            .copied()
21277            .flatten()
21278            .ok_or("zero components")?;
21279        assert_eq!(zero.days, 0);
21280        assert_eq!(zero.hours, 0);
21281        assert_eq!(zero.minutes, 0);
21282        assert_eq!(zero.seconds, 0);
21283        assert_eq!(zero.milliseconds, 0);
21284        assert_eq!(zero.microseconds, 0);
21285        assert_eq!(zero.nanoseconds, 0);
21286
21287        let negative = components
21288            .get(3)
21289            .copied()
21290            .flatten()
21291            .ok_or("negative components")?;
21292        assert_eq!(negative.days, -1);
21293        assert_eq!(negative.hours, 23);
21294        assert_eq!(negative.minutes, 59);
21295        assert_eq!(negative.seconds, 59);
21296        assert_eq!(negative.milliseconds, 999);
21297        assert_eq!(negative.microseconds, 999);
21298        assert_eq!(negative.nanoseconds, 999);
21299
21300        Ok(())
21301    }
21302
21303    #[test]
21304    fn datetime_index_month_name_and_day_name_match_pandas_fqkiu() {
21305        // 2024-01-15 (Monday in January), 2024-12-31 (Tuesday in December),
21306        // i64::MIN (NAT).
21307        const NS: i64 = 1_000_000_000;
21308        let mon_jan: i64 = 1_705_276_800_i64 * NS;
21309        let tue_dec: i64 = 1_735_603_200_i64 * NS;
21310        let dt = super::DatetimeIndex::new(vec![mon_jan, tue_dec, i64::MIN]);
21311
21312        assert_eq!(
21313            dt.month_name(),
21314            vec![
21315                Some("January".to_owned()),
21316                Some("December".to_owned()),
21317                None
21318            ]
21319        );
21320        assert_eq!(
21321            dt.day_name(),
21322            vec![Some("Monday".to_owned()), Some("Tuesday".to_owned()), None]
21323        );
21324    }
21325
21326    #[test]
21327    fn datetime_index_normalize_truncates_to_midnight_utc_fqkiu() {
21328        const NS: i64 = 1_000_000_000;
21329        // 2024-01-15 12:34:56.789Z plus midnight 2024-01-21Z plus NAT.
21330        let mid_day: i64 = 1_705_276_800_i64 * NS + 12 * 3600 * NS + 34 * 60 * NS + 56 * NS + 789;
21331        let midnight: i64 = 1_705_795_200_i64 * NS;
21332        let nat = i64::MIN;
21333
21334        let dt = super::DatetimeIndex::new(vec![mid_day, midnight, nat]).set_name("when");
21335        let normed = dt.normalize();
21336
21337        // Each non-NAT entry is now at midnight; NAT stays NAT; name preserved.
21338        assert_eq!(
21339            normed.values(),
21340            vec![Some(1_705_276_800_i64 * NS), Some(midnight), None]
21341        );
21342        assert_eq!(normed.name(), Some("when"));
21343        assert!(normed.is_normalized());
21344    }
21345
21346    #[test]
21347    fn datetime_index_is_normalized_returns_false_when_any_non_midnight_fqkiu() {
21348        const NS: i64 = 1_000_000_000;
21349        let mid_day: i64 = 1_705_276_800_i64 * NS + 1; // 1ns past midnight
21350        let midnight: i64 = 1_705_795_200_i64 * NS;
21351        let mixed = super::DatetimeIndex::new(vec![midnight, mid_day]);
21352        assert!(!mixed.is_normalized());
21353
21354        let only_midnight = super::DatetimeIndex::new(vec![midnight]);
21355        assert!(only_midnight.is_normalized());
21356
21357        let only_nat = super::DatetimeIndex::new(vec![i64::MIN, i64::MIN]);
21358        assert!(only_nat.is_normalized());
21359
21360        let empty = super::DatetimeIndex::new(vec![]);
21361        assert!(empty.is_normalized());
21362    }
21363
21364    #[test]
21365    fn datetime_index_feb_28_in_non_leap_year_is_month_end_qy7yd() {
21366        // 2023-02-28 00:00:00Z is the month-end of February 2023 (28 days).
21367        let feb_28_2023: i64 = 1_677_542_400_i64 * 1_000_000_000;
21368        let dt = super::DatetimeIndex::new(vec![feb_28_2023]);
21369        assert_eq!(dt.is_month_end(), vec![Some(true)]);
21370    }
21371
21372    #[test]
21373    fn datetime_index_leap_year_century_rule_k860x() {
21374        // 2000-06-15 (leap), 2100-06-15 (not leap), 2024-02-15 (leap),
21375        // 2023-02-15 (not leap), Feb in leap vs non-leap year.
21376        let y2000: i64 = 960_076_800 * 1_000_000_000;
21377        let y2100: i64 = 4_117_046_400 * 1_000_000_000;
21378        let y2024feb: i64 = 1_708_002_000 * 1_000_000_000;
21379        let y2023feb: i64 = 1_676_466_000 * 1_000_000_000;
21380
21381        let dt = super::DatetimeIndex::new(vec![y2000, y2100, y2024feb, y2023feb]);
21382        assert_eq!(
21383            dt.is_leap_year(),
21384            vec![Some(true), Some(false), Some(true), Some(false)]
21385        );
21386        // Feb in leap year -> 29 days; non-leap -> 28.
21387        let dim = dt.days_in_month();
21388        assert_eq!(dim[2], Some(29));
21389        assert_eq!(dim[3], Some(28));
21390    }
21391
21392    #[test]
21393    fn datetime_index_time_of_day_accessors_handle_empty_znejf() {
21394        let dt = super::DatetimeIndex::new(vec![]);
21395        assert!(dt.hour().is_empty());
21396        assert!(dt.minute().is_empty());
21397        assert!(dt.second().is_empty());
21398        assert!(dt.microsecond().is_empty());
21399        assert!(dt.nanosecond().is_empty());
21400    }
21401
21402    #[test]
21403    fn range_index_missingness_methods_are_closed_form_a4fih() {
21404        let asc = super::RangeIndex::new(0, 5, 1).unwrap();
21405        assert_eq!(asc.isna(), vec![false; 5]);
21406        assert_eq!(asc.isnull(), vec![false; 5]);
21407        assert_eq!(asc.notna(), vec![true; 5]);
21408        assert_eq!(asc.notnull(), vec![true; 5]);
21409        assert!(!asc.hasnans());
21410        assert!(asc.dropna().equals(&asc));
21411        assert!(asc.fillna(99).equals(&asc));
21412
21413        let desc = super::RangeIndex::new(10, 0, -2).unwrap();
21414        assert_eq!(desc.isna().len(), desc.len());
21415        assert!(!desc.hasnans());
21416        assert!(desc.dropna().equals(&desc));
21417
21418        let empty = super::RangeIndex::new(0, 0, 1).unwrap();
21419        assert_eq!(empty.isna(), Vec::<bool>::new());
21420        assert_eq!(empty.notna(), Vec::<bool>::new());
21421        assert!(!empty.hasnans());
21422        assert!(empty.dropna().is_empty());
21423        assert!(empty.fillna(0).is_empty());
21424    }
21425
21426    #[test]
21427    fn range_index_format_stringifies_each_value_a4fih() {
21428        let asc = super::RangeIndex::new(0, 4, 1).unwrap();
21429        assert_eq!(asc.format(), vec!["0", "1", "2", "3"]);
21430
21431        let desc = super::RangeIndex::new(5, 0, -2).unwrap();
21432        assert_eq!(desc.format(), vec!["5", "3", "1"]);
21433
21434        let empty = super::RangeIndex::new(0, 0, 1).unwrap();
21435        assert_eq!(empty.format(), Vec::<String>::new());
21436    }
21437
21438    #[test]
21439    fn range_index_factorize_is_identity_a4fih() {
21440        let asc = super::RangeIndex::new(0, 5, 1).unwrap();
21441        let (codes, uniques) = asc.factorize();
21442        assert_eq!(codes, vec![0, 1, 2, 3, 4]);
21443        assert!(uniques.equals(&asc));
21444
21445        let desc = super::RangeIndex::new(10, 0, -2).unwrap();
21446        let (desc_codes, desc_uniques) = desc.factorize();
21447        assert_eq!(desc_codes, (0..desc.len()).collect::<Vec<_>>());
21448        assert!(desc_uniques.equals(&desc));
21449
21450        let empty = super::RangeIndex::new(0, 0, 1).unwrap();
21451        let (empty_codes, empty_uniques) = empty.factorize();
21452        assert!(empty_codes.is_empty());
21453        assert!(empty_uniques.is_empty());
21454    }
21455
21456    #[test]
21457    fn range_index_duplicated_drop_duplicates_are_no_ops_mrchb() {
21458        let asc = super::RangeIndex::new(0, 5, 1).unwrap();
21459        for keep in [
21460            super::DuplicateKeep::First,
21461            super::DuplicateKeep::Last,
21462            super::DuplicateKeep::None,
21463        ] {
21464            assert_eq!(asc.duplicated(keep), vec![false; asc.len()]);
21465        }
21466        let cloned = asc.drop_duplicates();
21467        assert!(cloned.equals(&asc));
21468        assert_eq!(cloned.len(), asc.len());
21469
21470        let empty = super::RangeIndex::new(0, 0, 1).unwrap();
21471        assert_eq!(
21472            empty.duplicated(super::DuplicateKeep::First),
21473            Vec::<bool>::new()
21474        );
21475        assert!(empty.drop_duplicates().is_empty());
21476    }
21477
21478    #[test]
21479    fn multi_index_asof_rejects_tuple_comparison_d89fe13() -> Result<(), super::IndexError> {
21480        let string_level = MultiIndex::from_tuples(vec![
21481            vec!["a".into(), 1_i64.into()],
21482            vec!["b".into(), 2_i64.into()],
21483        ])?;
21484        let int_level = MultiIndex::from_tuples(vec![
21485            vec![1_i64.into(), "a".into()],
21486            vec![2_i64.into(), "b".into()],
21487        ])?;
21488
21489        let string_err = string_level
21490            .asof(&[IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)])
21491            .unwrap_err();
21492        let int_err = int_level
21493            .asof(&[IndexLabel::Int64(1), IndexLabel::Utf8("a".into())])
21494            .unwrap_err();
21495
21496        assert!(matches!(
21497            string_err,
21498            super::IndexError::InvalidArgument(message)
21499                if message == "'<' not supported between instances of 'tuple' and 'str'"
21500        ));
21501        assert!(matches!(
21502            int_err,
21503            super::IndexError::InvalidArgument(message)
21504                if message == "'<' not supported between instances of 'tuple' and 'int'"
21505        ));
21506        assert_eq!(MultiIndex::from_tuples(Vec::new())?.asof(&[])?, None);
21507
21508        Ok(())
21509    }
21510
21511    #[test]
21512    fn multi_index_asof_locs_rejects_mask_and_broadcast_paths_d89fe14()
21513    -> Result<(), super::IndexError> {
21514        let source = MultiIndex::from_tuples(vec![
21515            vec!["a".into(), 1_i64.into()],
21516            vec!["a".into(), 3_i64.into()],
21517            vec!["b".into(), 2_i64.into()],
21518        ])?;
21519        let where_index = MultiIndex::from_tuples(vec![
21520            vec!["a".into(), 0_i64.into()],
21521            vec!["a".into(), 2_i64.into()],
21522            vec!["b".into(), 2_i64.into()],
21523        ])?;
21524
21525        let no_mask = source.asof_locs(&where_index, None).unwrap_err();
21526        let mismatched_mask = source
21527            .asof_locs(&where_index, Some(&[true, true]))
21528            .unwrap_err();
21529        let empty_take = source
21530            .asof_locs(&where_index, Some(&[false, false, false]))
21531            .unwrap_err();
21532        let broadcast = source
21533            .asof_locs(&where_index, Some(&[true, false, true]))
21534            .unwrap_err();
21535        let empty_source = MultiIndex::from_arrays(vec![Vec::new(), Vec::new()])?;
21536        let empty_mask = empty_source
21537            .asof_locs(&empty_source, Some(&[]))
21538            .unwrap_err();
21539
21540        assert!(matches!(
21541            no_mask,
21542            super::IndexError::InvalidArgument(message)
21543                if message == "object too deep for desired array"
21544        ));
21545        assert!(matches!(
21546            mismatched_mask,
21547            super::IndexError::InvalidArgument(message)
21548                if message == "boolean index did not match indexed array along axis 0; size of axis is 3 but size of corresponding boolean axis is 2"
21549        ));
21550        assert!(matches!(
21551            empty_take,
21552            super::IndexError::InvalidArgument(message)
21553                if message == "cannot do a non-empty take from an empty axes."
21554        ));
21555        assert!(matches!(
21556            broadcast,
21557            super::IndexError::InvalidArgument(message)
21558                if message == "operands could not be broadcast together with shapes (3,) (2,)"
21559        ));
21560        assert!(matches!(
21561            empty_mask,
21562            super::IndexError::InvalidArgument(message)
21563                if message == "attempt to get argmax of an empty sequence"
21564        ));
21565
21566        Ok(())
21567    }
21568
21569    #[test]
21570    fn multi_index_drop_duplicates_append_repeat_and_dropna() {
21571        let left = MultiIndex::from_tuples(vec![
21572            vec!["a".into(), 1_i64.into()],
21573            vec!["a".into(), 1_i64.into()],
21574            vec!["b".into(), 2_i64.into()],
21575            vec![IndexLabel::Datetime64(i64::MIN), 3_i64.into()],
21576            vec![
21577                IndexLabel::Datetime64(i64::MIN),
21578                IndexLabel::Timedelta64(Timedelta::NAT),
21579            ],
21580        ])
21581        .unwrap()
21582        .set_names(vec![Some("letter".into()), Some("number".into())]);
21583
21584        assert_eq!(
21585            left.drop_duplicates().to_list(),
21586            vec![
21587                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21588                vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
21589                vec![IndexLabel::Datetime64(i64::MIN), IndexLabel::Int64(3)],
21590                vec![
21591                    IndexLabel::Datetime64(i64::MIN),
21592                    IndexLabel::Timedelta64(Timedelta::NAT),
21593                ],
21594            ]
21595        );
21596        assert_eq!(
21597            left.dropna().to_list(),
21598            vec![
21599                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21600                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21601                vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
21602            ]
21603        );
21604        assert_eq!(left.dropna_all().len(), 4);
21605
21606        let right = MultiIndex::from_tuples(vec![vec!["c".into(), 3_i64.into()]])
21607            .unwrap()
21608            .set_names(vec![Some("letter".into()), Some("other".into())]);
21609        let appended = left.append(&right).unwrap();
21610        assert_eq!(appended.len(), 6);
21611        assert_eq!(appended.names(), &[Some("letter".into()), None]);
21612
21613        let repeated = right.repeat(2);
21614        assert_eq!(
21615            repeated.to_list(),
21616            vec![
21617                vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)],
21618                vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)],
21619            ]
21620        );
21621        assert_eq!(right.repeat(0).len(), 0);
21622    }
21623
21624    #[test]
21625    fn multi_index_insert_delete_and_drop_tuples() {
21626        let mi = MultiIndex::from_tuples(vec![
21627            vec!["b".into(), 2_i64.into()],
21628            vec!["a".into(), 2_i64.into()],
21629            vec!["a".into(), 1_i64.into()],
21630            vec!["b".into(), 2_i64.into()],
21631        ])
21632        .unwrap()
21633        .set_names(vec![Some("letter".into()), Some("number".into())]);
21634
21635        let inserted = mi.insert(1, vec!["z".into(), 9_i64.into()]).unwrap();
21636        assert_eq!(
21637            inserted.to_list(),
21638            vec![
21639                vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
21640                vec![IndexLabel::Utf8("z".into()), IndexLabel::Int64(9)],
21641                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)],
21642                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21643                vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
21644            ]
21645        );
21646        assert_eq!(inserted.names(), mi.names());
21647
21648        let deleted = inserted.delete(1).unwrap();
21649        assert_eq!(deleted, mi);
21650        assert!(mi.insert(0, vec!["short".into()]).is_err());
21651        assert!(mi.delete(99).is_err());
21652
21653        let dropped = mi
21654            .drop(&[vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)]])
21655            .unwrap();
21656        assert_eq!(
21657            dropped.to_list(),
21658            vec![
21659                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)],
21660                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21661            ]
21662        );
21663        assert!(
21664            mi.drop(&[vec![
21665                IndexLabel::Utf8("missing".into()),
21666                IndexLabel::Int64(0)
21667            ]])
21668            .is_err()
21669        );
21670    }
21671
21672    #[test]
21673    fn multi_index_factorize_sort_and_reduce_tuples() {
21674        let mi = MultiIndex::from_tuples(vec![
21675            vec!["b".into(), 2_i64.into()],
21676            vec!["a".into(), 2_i64.into()],
21677            vec!["a".into(), 1_i64.into()],
21678            vec!["b".into(), 2_i64.into()],
21679            vec!["a".into(), 2_i64.into()],
21680            vec!["c".into(), 3_i64.into()],
21681        ])
21682        .unwrap()
21683        .set_names(vec![Some("letter".into()), Some("number".into())]);
21684
21685        let (codes, uniques) = mi.factorize();
21686        assert_eq!(codes, vec![0, 1, 2, 0, 1, 3]);
21687        assert_eq!(
21688            uniques.to_list(),
21689            vec![
21690                vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
21691                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)],
21692                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21693                vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)],
21694            ]
21695        );
21696        assert_eq!(uniques.names(), mi.names());
21697        assert_eq!(mi.unique(), uniques);
21698        assert_eq!(mi.nunique(), 4);
21699        assert_eq!(
21700            mi.value_counts(),
21701            vec![
21702                (vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)], 2),
21703                (vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)], 2),
21704                (vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)], 1),
21705                (vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)], 1),
21706            ]
21707        );
21708
21709        let sorted = mi.sort_values();
21710        assert_eq!(
21711            sorted.to_list(),
21712            vec![
21713                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21714                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)],
21715                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)],
21716                vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
21717                vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
21718                vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)],
21719            ]
21720        );
21721        let (sortlevel, order) = mi.sortlevel();
21722        assert_eq!(sortlevel, sorted);
21723        assert_eq!(order, vec![2, 1, 4, 0, 3, 5]);
21724        assert_eq!(mi.sort(), sorted);
21725        assert_eq!(
21726            mi.min().unwrap(),
21727            vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)]
21728        );
21729        assert_eq!(
21730            mi.max().unwrap(),
21731            vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)]
21732        );
21733        assert_eq!(mi.argmin(), Some(2));
21734        assert_eq!(mi.argmax(), Some(5));
21735
21736        let empty = MultiIndex::from_tuples(Vec::new()).unwrap();
21737        assert_eq!(empty.min(), None);
21738        assert_eq!(empty.max(), None);
21739        assert_eq!(empty.argmin(), None);
21740        assert_eq!(empty.argmax(), None);
21741    }
21742
21743    #[test]
21744    fn multi_index_tuple_set_ops_preserve_order_and_shared_names() {
21745        let left = MultiIndex::from_tuples(vec![
21746            vec!["a".into(), 1_i64.into()],
21747            vec!["a".into(), 2_i64.into()],
21748            vec!["b".into(), 1_i64.into()],
21749            vec!["a".into(), 1_i64.into()],
21750        ])
21751        .unwrap()
21752        .set_names(vec![Some("letter".into()), Some("number".into())]);
21753        let right = MultiIndex::from_tuples(vec![
21754            vec!["a".into(), 2_i64.into()],
21755            vec!["c".into(), 3_i64.into()],
21756        ])
21757        .unwrap()
21758        .set_names(vec![Some("letter".into()), Some("other".into())]);
21759
21760        let intersection = left.intersection(&right).unwrap();
21761        assert_eq!(
21762            intersection.to_list(),
21763            vec![vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)]]
21764        );
21765        assert_eq!(intersection.names(), &[Some("letter".into()), None]);
21766
21767        assert_eq!(
21768            left.union(&right).unwrap().to_list(),
21769            vec![
21770                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21771                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)],
21772                vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(1)],
21773                vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)],
21774            ]
21775        );
21776        assert_eq!(
21777            left.difference(&right).unwrap().to_list(),
21778            vec![
21779                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21780                vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(1)],
21781            ]
21782        );
21783        assert_eq!(
21784            left.symmetric_difference(&right).unwrap().to_list(),
21785            vec![
21786                vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21787                vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(1)],
21788                vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)],
21789            ]
21790        );
21791    }
21792
21793    #[test]
21794    fn multi_index_reorder_levels() {
21795        let mi = MultiIndex::from_tuples(vec![
21796            vec!["a".into(), 1_i64.into(), "x".into()],
21797            vec!["b".into(), 2_i64.into(), "y".into()],
21798        ])
21799        .unwrap()
21800        .set_names(vec![
21801            Some("letter".into()),
21802            Some("number".into()),
21803            Some("code".into()),
21804        ]);
21805
21806        // Reorder: [2, 0, 1] → code, letter, number.
21807        let reordered = mi.reorder_levels(&[2, 0, 1]).unwrap();
21808        assert_eq!(reordered.nlevels(), 3);
21809        assert_eq!(
21810            reordered.names(),
21811            &[
21812                Some("code".into()),
21813                Some("letter".into()),
21814                Some("number".into())
21815            ]
21816        );
21817
21818        // First row should be ("x", "a", 1).
21819        let tuple = reordered.get_tuple(0).unwrap();
21820        assert_eq!(tuple[0], &IndexLabel::Utf8("x".into()));
21821        assert_eq!(tuple[1], &IndexLabel::Utf8("a".into()));
21822        assert_eq!(tuple[2], &IndexLabel::Int64(1));
21823    }
21824
21825    #[test]
21826    fn multi_index_reorder_levels_identity() {
21827        let mi = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]]).unwrap();
21828
21829        // Identity reorder [0, 1] should be a no-op.
21830        let same = mi.reorder_levels(&[0, 1]).unwrap();
21831        assert_eq!(same, mi);
21832    }
21833
21834    #[test]
21835    fn multi_index_reorder_levels_wrong_length_errors() {
21836        let mi = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]]).unwrap();
21837
21838        assert!(mi.reorder_levels(&[0]).is_err());
21839        assert!(mi.reorder_levels(&[0, 1, 2]).is_err());
21840    }
21841
21842    #[test]
21843    fn multi_index_reorder_levels_duplicate_index_errors() {
21844        let mi = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]]).unwrap();
21845
21846        assert!(mi.reorder_levels(&[0, 0]).is_err());
21847    }
21848
21849    #[test]
21850    fn multi_index_reorder_levels_out_of_bounds_errors() {
21851        let mi = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]]).unwrap();
21852
21853        assert!(mi.reorder_levels(&[0, 5]).is_err());
21854    }
21855
21856    // ── is_monotonic / is_lexsorted tests (br-frankenpandas-w4uu) ───────
21857
21858    #[test]
21859    fn multi_index_is_monotonic_increasing_on_sorted() {
21860        // Rows: [(A,1), (A,2), (B,1)] — strictly increasing lexicographic.
21861        let mi = MultiIndex::from_tuples(vec![
21862            vec!["A".into(), 1_i64.into()],
21863            vec!["A".into(), 2_i64.into()],
21864            vec!["B".into(), 1_i64.into()],
21865        ])
21866        .unwrap();
21867        assert!(mi.is_monotonic_increasing());
21868        assert!(mi.is_lexsorted());
21869        assert!(!mi.is_monotonic_decreasing());
21870    }
21871
21872    #[test]
21873    fn multi_index_is_monotonic_decreasing_on_reverse_sorted() {
21874        // Rows: [(B,2), (B,1), (A,1)].
21875        let mi = MultiIndex::from_tuples(vec![
21876            vec!["B".into(), 2_i64.into()],
21877            vec!["B".into(), 1_i64.into()],
21878            vec!["A".into(), 1_i64.into()],
21879        ])
21880        .unwrap();
21881        assert!(mi.is_monotonic_decreasing());
21882        assert!(!mi.is_monotonic_increasing());
21883    }
21884
21885    #[test]
21886    fn multi_index_is_monotonic_both_directions_on_constant_inner() {
21887        // Equal-level-value rows: [(A,1), (A,1)] — both monotonic trivially.
21888        let mi = MultiIndex::from_tuples(vec![
21889            vec!["A".into(), 1_i64.into()],
21890            vec!["A".into(), 1_i64.into()],
21891        ])
21892        .unwrap();
21893        assert!(mi.is_monotonic_increasing());
21894        assert!(mi.is_monotonic_decreasing());
21895    }
21896
21897    #[test]
21898    fn multi_index_empty_is_monotonic() {
21899        let mi = MultiIndex::from_tuples(Vec::new()).unwrap();
21900        assert!(mi.is_monotonic_increasing());
21901        assert!(mi.is_monotonic_decreasing());
21902        assert!(mi.is_lexsorted());
21903    }
21904
21905    #[test]
21906    fn multi_index_single_row_is_monotonic() {
21907        let mi = MultiIndex::from_tuples(vec![vec!["A".into(), 1_i64.into()]]).unwrap();
21908        assert!(mi.is_monotonic_increasing());
21909        assert!(mi.is_monotonic_decreasing());
21910        assert!(mi.is_lexsorted());
21911    }
21912
21913    #[test]
21914    fn multi_index_unsorted_is_neither() {
21915        // Rows: [(B,1), (A,2), (B,2)] — unsorted at the outer level.
21916        let mi = MultiIndex::from_tuples(vec![
21917            vec!["B".into(), 1_i64.into()],
21918            vec!["A".into(), 2_i64.into()],
21919            vec!["B".into(), 2_i64.into()],
21920        ])
21921        .unwrap();
21922        assert!(!mi.is_monotonic_increasing());
21923        assert!(!mi.is_monotonic_decreasing());
21924        assert!(!mi.is_lexsorted());
21925    }
21926
21927    #[test]
21928    fn multi_index_outer_ascending_inner_descending_is_not_monotonic() {
21929        // Rows: [(A,5), (A,1), (B,3)] — outer ascending, inner within A descends.
21930        let mi = MultiIndex::from_tuples(vec![
21931            vec!["A".into(), 5_i64.into()],
21932            vec!["A".into(), 1_i64.into()],
21933            vec!["B".into(), 3_i64.into()],
21934        ])
21935        .unwrap();
21936        // Lexicographically (A,5) > (A,1) so the "increasing" check fails.
21937        assert!(!mi.is_monotonic_increasing());
21938        // (A,5) > (A,1) but (A,1) < (B,3) so decreasing also fails.
21939        assert!(!mi.is_monotonic_decreasing());
21940    }
21941
21942    #[test]
21943    fn index_lookup_methods_match_pandas() {
21944        use super::{Index, IndexLabel};
21945        let i = Index::new(vec![
21946            IndexLabel::Int64(1),
21947            IndexLabel::Int64(3),
21948            IndexLabel::Int64(5),
21949            IndexLabel::Int64(7),
21950        ]);
21951
21952        // get_indexer (exact): -1/None for labels not present (verified vs
21953        // pandas Index([1,3,5,7]).get_indexer([2,3,6,7]) == [-1,1,-1,3]).
21954        let target = Index::new(vec![
21955            IndexLabel::Int64(2),
21956            IndexLabel::Int64(3),
21957            IndexLabel::Int64(6),
21958            IndexLabel::Int64(7),
21959        ]);
21960        assert_eq!(
21961            i.get_indexer(&target),
21962            vec![None, Some(1), None, Some(3)],
21963            "get_indexer exact"
21964        );
21965
21966        // searchsorted left/right (pandas: 3->1/2, 4->2, 8->4, 0->0).
21967        assert_eq!(i.searchsorted(&IndexLabel::Int64(3), "left").unwrap(), 1);
21968        assert_eq!(i.searchsorted(&IndexLabel::Int64(3), "right").unwrap(), 2);
21969        assert_eq!(i.searchsorted(&IndexLabel::Int64(4), "left").unwrap(), 2);
21970        assert_eq!(i.searchsorted(&IndexLabel::Int64(8), "left").unwrap(), 4);
21971        assert_eq!(i.searchsorted(&IndexLabel::Int64(0), "left").unwrap(), 0);
21972
21973        // asof: last label <= key (pandas: 4->3, 0->NaN, 7->7, 10->7).
21974        assert_eq!(
21975            i.asof(&IndexLabel::Int64(4)),
21976            Some(IndexLabel::Int64(3)),
21977            "asof 4"
21978        );
21979        assert_eq!(i.asof(&IndexLabel::Int64(0)), None, "asof before all");
21980        assert_eq!(
21981            i.asof(&IndexLabel::Int64(7)),
21982            Some(IndexLabel::Int64(7)),
21983            "asof exact"
21984        );
21985        assert_eq!(
21986            i.asof(&IndexLabel::Int64(10)),
21987            Some(IndexLabel::Int64(7)),
21988            "asof after all"
21989        );
21990
21991        // factorize: first-appearance order (pandas: ['b','a','b','c'] ->
21992        // codes [0,1,0,2], uniques ['b','a','c']).
21993        let f = Index::new(vec![
21994            IndexLabel::Utf8("b".into()),
21995            IndexLabel::Utf8("a".into()),
21996            IndexLabel::Utf8("b".into()),
21997            IndexLabel::Utf8("c".into()),
21998        ]);
21999        let (codes, uniques) = f.factorize();
22000        assert_eq!(codes, vec![0_isize, 1, 0, 2], "factorize codes");
22001        assert_eq!(
22002            uniques.labels(),
22003            &[
22004                IndexLabel::Utf8("b".into()),
22005                IndexLabel::Utf8("a".into()),
22006                IndexLabel::Utf8("c".into())
22007            ],
22008            "factorize uniques"
22009        );
22010    }
22011}