1#![forbid(unsafe_code)]
2#![warn(rustdoc::broken_intra_doc_links)]
3
4use std::{
77 borrow::Cow,
78 collections::HashMap,
79 fmt,
80 sync::{
81 Arc, Mutex, OnceLock,
82 atomic::{AtomicU64, Ordering as AtomicOrdering},
83 },
84};
85
86use chrono::Datelike;
87use fp_types::{Period, PeriodFreq, Scalar, Timedelta, TimedeltaComponents};
88use rustc_hash::{FxHashMap, FxHashSet};
94use serde::{Deserialize, Deserializer, Serialize, Serializer};
95use thiserror::Error;
96
97#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
98#[serde(tag = "kind", content = "value", rename_all = "snake_case")]
99pub enum IndexLabel {
100 Int64(i64),
101 Utf8(String),
102 Timedelta64(i64),
103 Datetime64(i64),
104 Null(fp_types::NullKind),
113}
114
115impl From<i64> for IndexLabel {
116 fn from(value: i64) -> Self {
117 Self::Int64(value)
118 }
119}
120
121impl From<&str> for IndexLabel {
122 fn from(value: &str) -> Self {
123 Self::Utf8(value.to_owned())
124 }
125}
126
127impl From<String> for IndexLabel {
128 fn from(value: String) -> Self {
129 Self::Utf8(value)
130 }
131}
132
133impl IndexLabel {
134 #[must_use]
135 fn is_missing(&self) -> bool {
136 match self {
137 Self::Timedelta64(value) => *value == Timedelta::NAT,
138 Self::Datetime64(value) => *value == i64::MIN,
139 Self::Int64(_) | Self::Utf8(_) => false,
140 Self::Null(_) => true,
141 }
142 }
143}
144
145fn index_label_is_truthy(label: &IndexLabel) -> bool {
146 if label.is_missing() {
147 return false;
148 }
149 match label {
150 IndexLabel::Int64(v) => *v != 0,
151 IndexLabel::Utf8(s) => !s.is_empty(),
152 IndexLabel::Timedelta64(v) => *v != 0,
153 IndexLabel::Datetime64(v) => *v != 0,
154 IndexLabel::Null(_) => false,
156 }
157}
158
159impl fmt::Display for IndexLabel {
160 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
161 match self {
162 Self::Int64(v) => write!(f, "{v}"),
163 Self::Utf8(v) => write!(f, "{v}"),
164 Self::Timedelta64(v) => write!(f, "{}", Timedelta::format(*v)),
165 Self::Datetime64(v) => write!(f, "{}", format_datetime_ns(*v)),
166 Self::Null(fp_types::NullKind::Null) => write!(f, "None"),
170 Self::Null(fp_types::NullKind::NaN) => write!(f, "NaN"),
171 Self::Null(fp_types::NullKind::NaT) => write!(f, "NaT"),
172 }
173 }
174}
175
176pub fn format_datetime_ns(nanos: i64) -> String {
177 if nanos == i64::MIN {
178 return "NaT".to_owned();
179 }
180 let secs = nanos / 1_000_000_000;
181 let subsec_nanos = (nanos % 1_000_000_000).unsigned_abs() as u32;
182 let dt = chrono::DateTime::from_timestamp(secs, subsec_nanos)
183 .unwrap_or(chrono::DateTime::UNIX_EPOCH);
184 dt.format("%Y-%m-%d %H:%M:%S").to_string()
185}
186
187#[derive(Debug, Clone, Copy, PartialEq, Eq)]
192enum SortOrder {
193 Unsorted,
195 AscendingInt64,
197 AscendingUtf8,
199 AscendingTimedelta64,
201 AscendingDatetime64,
203}
204
205#[derive(Debug, Clone, Copy, PartialEq, Eq)]
208enum SetMergeKind {
209 Intersection,
211 Difference,
213}
214
215fn detect_sort_order(labels: &[IndexLabel]) -> SortOrder {
217 if labels.len() <= 1 {
218 return match labels.first() {
219 Some(IndexLabel::Int64(_)) | None => SortOrder::AscendingInt64,
220 Some(IndexLabel::Utf8(_)) => SortOrder::AscendingUtf8,
221 Some(IndexLabel::Timedelta64(_)) => SortOrder::AscendingTimedelta64,
222 Some(IndexLabel::Datetime64(_)) => SortOrder::AscendingDatetime64,
223 Some(IndexLabel::Null(_)) => SortOrder::Unsorted,
225 };
226 }
227
228 let all_int = labels.iter().all(|l| matches!(l, IndexLabel::Int64(_)));
230 if all_int {
231 let is_sorted = labels.windows(2).all(|w| {
232 if let (IndexLabel::Int64(a), IndexLabel::Int64(b)) = (&w[0], &w[1]) {
233 a < b
234 } else {
235 false
236 }
237 });
238 if is_sorted {
239 return SortOrder::AscendingInt64;
240 }
241 }
242
243 let all_utf8 = labels.iter().all(|l| matches!(l, IndexLabel::Utf8(_)));
245 if all_utf8 {
246 let is_sorted = labels.windows(2).all(|w| {
247 if let (IndexLabel::Utf8(a), IndexLabel::Utf8(b)) = (&w[0], &w[1]) {
248 a < b
249 } else {
250 false
251 }
252 });
253 if is_sorted {
254 return SortOrder::AscendingUtf8;
255 }
256 }
257
258 let all_td = labels
260 .iter()
261 .all(|l| matches!(l, IndexLabel::Timedelta64(_)));
262 if all_td {
263 let is_sorted = labels.windows(2).all(|w| {
264 if let (IndexLabel::Timedelta64(a), IndexLabel::Timedelta64(b)) = (&w[0], &w[1]) {
265 a < b
266 } else {
267 false
268 }
269 });
270 if is_sorted {
271 return SortOrder::AscendingTimedelta64;
272 }
273 }
274
275 let all_dt = labels
277 .iter()
278 .all(|l| matches!(l, IndexLabel::Datetime64(_)));
279 if all_dt {
280 let is_sorted = labels.windows(2).all(|w| {
281 if let (IndexLabel::Datetime64(a), IndexLabel::Datetime64(b)) = (&w[0], &w[1]) {
282 a < b
283 } else {
284 false
285 }
286 });
287 if is_sorted {
288 return SortOrder::AscendingDatetime64;
289 }
290 }
291
292 SortOrder::Unsorted
293}
294
295#[derive(Debug, Clone, Copy, PartialEq, Eq)]
296pub enum DuplicateKeep {
297 First,
298 Last,
299 None,
300}
301
302static INDEX_LABEL_ID_COUNTER: AtomicU64 = AtomicU64::new(1);
303static INDEX_LABEL_EQUALITY_CACHE: OnceLock<Mutex<FxHashMap<(u64, u64), bool>>> = OnceLock::new();
304
305const INDEX_LABEL_EQUALITY_CACHE_MAX: usize = 4096;
306
307fn next_index_label_identity() -> u64 {
308 INDEX_LABEL_ID_COUNTER.fetch_add(1, AtomicOrdering::Relaxed)
309}
310
311#[derive(Debug, Clone, Copy, PartialEq, Eq)]
312struct Int64UnitRangeLabels {
313 start: i64,
314 len: usize,
315}
316
317impl Int64UnitRangeLabels {
318 fn new(start: i64, len: usize) -> Option<Self> {
319 if len > 0 {
320 let last_offset = i64::try_from(len.checked_sub(1)?).ok()?;
321 start.checked_add(last_offset)?;
322 }
323 Some(Self { start, len })
324 }
325
326 fn materialize(self) -> Vec<IndexLabel> {
327 let mut labels = Vec::with_capacity(self.len);
328 for offset in 0..self.len {
329 let offset = i64::try_from(offset).expect("validated Int64 unit range length");
330 labels.push(IndexLabel::Int64(
331 self.start
332 .checked_add(offset)
333 .expect("validated Int64 unit range end"),
334 ));
335 }
336 labels
337 }
338
339 fn position(self, target: i64) -> Option<usize> {
340 let offset = target.checked_sub(self.start)?;
341 let offset = usize::try_from(offset).ok()?;
342 (offset < self.len).then_some(offset)
343 }
344
345 fn equals_slice(self, labels: &[IndexLabel]) -> bool {
346 labels.len() == self.len
347 && labels.iter().enumerate().all(|(offset, label)| {
348 let Ok(offset) = i64::try_from(offset) else {
349 return false;
350 };
351 matches!(
352 label,
353 IndexLabel::Int64(value)
354 if self.start.checked_add(offset).is_some_and(|expected| *value == expected)
355 )
356 })
357 }
358}
359
360struct IndexLabels {
361 materialized: OnceLock<Arc<Vec<IndexLabel>>>,
367 int64_unit_range: Option<Int64UnitRangeLabels>,
368 int64_typed: OnceLock<Option<Arc<Vec<i64>>>>,
375}
376
377impl IndexLabels {
378 fn new(labels: Vec<IndexLabel>) -> Self {
379 let materialized = OnceLock::new();
380 let _ = materialized.set(Arc::new(labels));
381 Self {
382 materialized,
383 int64_unit_range: None,
384 int64_typed: OnceLock::new(),
385 }
386 }
387
388 fn new_int64_unit_range(start: i64, len: usize) -> Option<Self> {
389 Some(Self {
390 materialized: OnceLock::new(),
391 int64_unit_range: Some(Int64UnitRangeLabels::new(start, len)?),
392 int64_typed: OnceLock::new(),
393 })
394 }
395
396 fn new_int64_values(values: Arc<Vec<i64>>) -> Self {
397 let int64_typed = OnceLock::new();
398 let _ = int64_typed.set(Some(values));
399 Self {
400 materialized: OnceLock::new(),
401 int64_unit_range: None,
402 int64_typed,
403 }
404 }
405
406 fn as_slice(&self) -> &[IndexLabel] {
407 self.materialized
408 .get_or_init(|| {
409 if let Some(range) = self.int64_unit_range {
410 return Arc::new(range.materialize());
411 }
412 let values = self
413 .int64_typed
414 .get()
415 .and_then(Option::as_ref)
416 .expect("lazy index labels require a typed or range backing");
417 Arc::new(values.iter().copied().map(IndexLabel::Int64).collect())
418 })
419 .as_slice()
420 }
421
422 fn len(&self) -> usize {
423 if let Some(range) = self.int64_unit_range {
424 return range.len;
425 }
426 if let Some(labels) = self.materialized.get() {
427 return labels.len();
428 }
429 if let Some(Some(values)) = self.int64_typed.get() {
430 return values.len();
431 }
432 self.as_slice().len()
433 }
434
435 fn is_empty(&self) -> bool {
436 self.len() == 0
437 }
438
439 fn int64_unit_range(&self) -> Option<Int64UnitRangeLabels> {
440 self.int64_unit_range
441 }
442
443 fn int64_view(&self) -> Option<Arc<Vec<i64>>> {
446 self.int64_typed
447 .get_or_init(|| {
448 if let Some(range) = self.int64_unit_range {
449 let mut values = Vec::with_capacity(range.len);
450 for offset in 0..range.len {
451 let offset =
452 i64::try_from(offset).expect("validated Int64 unit range length");
453 values.push(
454 range
455 .start
456 .checked_add(offset)
457 .expect("validated Int64 unit range end"),
458 );
459 }
460 return Some(Arc::new(values));
461 }
462 let labels = self.materialized.get()?;
463 let mut values = Vec::with_capacity(labels.len());
464 for label in labels.iter() {
465 match label {
466 IndexLabel::Int64(value) => values.push(*value),
467 _ => return None,
468 }
469 }
470 Some(Arc::new(values))
471 })
472 .clone()
473 }
474
475 fn cached_int64_view(&self) -> Option<Option<Arc<Vec<i64>>>> {
478 self.int64_typed.get().cloned()
479 }
480}
481
482impl Clone for IndexLabels {
483 fn clone(&self) -> Self {
484 let int64_typed = OnceLock::new();
485 if let Some(view) = self.int64_typed.get() {
486 let _ = int64_typed.set(view.clone());
487 }
488 let materialized = OnceLock::new();
489 let has_lazy_backing =
492 self.int64_unit_range.is_some() || matches!(int64_typed.get(), Some(Some(_)));
493 if !has_lazy_backing && let Some(labels) = self.materialized.get() {
494 let _ = materialized.set(labels.clone());
495 }
496 Self {
497 materialized,
498 int64_unit_range: self.int64_unit_range,
499 int64_typed,
500 }
501 }
502}
503
504impl Default for IndexLabels {
505 fn default() -> Self {
506 Self::new(Vec::new())
507 }
508}
509
510impl fmt::Debug for IndexLabels {
511 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
512 self.as_slice().fmt(f)
513 }
514}
515
516impl PartialEq for IndexLabels {
517 fn eq(&self, other: &Self) -> bool {
518 match (self.int64_unit_range, other.int64_unit_range) {
519 (Some(left), Some(right)) => left == right,
520 (Some(range), None) => range.equals_slice(other.as_slice()),
521 (None, Some(range)) => range.equals_slice(self.as_slice()),
522 (None, None) => self.as_slice() == other.as_slice(),
523 }
524 }
525}
526
527impl Eq for IndexLabels {}
528
529impl std::ops::Deref for IndexLabels {
530 type Target = [IndexLabel];
531
532 fn deref(&self) -> &Self::Target {
533 self.as_slice()
534 }
535}
536
537impl<'a> IntoIterator for &'a IndexLabels {
538 type Item = &'a IndexLabel;
539 type IntoIter = std::slice::Iter<'a, IndexLabel>;
540
541 fn into_iter(self) -> Self::IntoIter {
542 self.as_slice().iter()
543 }
544}
545
546impl Serialize for IndexLabels {
547 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
548 where
549 S: Serializer,
550 {
551 self.as_slice().serialize(serializer)
552 }
553}
554
555impl<'de> Deserialize<'de> for IndexLabels {
556 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
557 where
558 D: Deserializer<'de>,
559 {
560 Vec::<IndexLabel>::deserialize(deserializer).map(Self::new)
561 }
562}
563
564#[derive(Debug, Clone, Serialize, Deserialize)]
565pub struct Index {
566 #[serde(default)]
567 labels: IndexLabels,
568 #[serde(default, skip_serializing_if = "Option::is_none")]
570 name: Option<String>,
571 #[serde(skip, default = "next_index_label_identity")]
573 label_identity: u64,
574 #[serde(skip)]
575 duplicate_cache: OnceLock<bool>,
576 #[serde(skip)]
578 sort_order_cache: OnceLock<SortOrder>,
579 #[serde(skip)]
581 semantic_fingerprint_cache: OnceLock<String>,
582}
583
584impl PartialEq for Index {
585 fn eq(&self, other: &Self) -> bool {
586 self.labels_equal(other)
587 }
588}
589
590impl Eq for Index {}
591
592fn detect_duplicates(labels: &[IndexLabel]) -> bool {
593 let mut seen = FxHashMap::<&IndexLabel, ()>::default();
594 for label in labels {
595 if seen.insert(label, ()).is_some() {
596 return true;
597 }
598 }
599 false
600}
601
602fn ordered_label_identity_pair(left: u64, right: u64) -> (u64, u64) {
603 if left <= right {
604 (left, right)
605 } else {
606 (right, left)
607 }
608}
609
610impl Index {
611 #[must_use]
612 pub fn new(labels: Vec<IndexLabel>) -> Self {
613 Self {
614 labels: IndexLabels::new(labels),
615 name: None,
616 label_identity: next_index_label_identity(),
617 duplicate_cache: OnceLock::new(),
618 sort_order_cache: OnceLock::new(),
619 semantic_fingerprint_cache: OnceLock::new(),
620 }
621 }
622
623 fn labels_equal(&self, other: &Self) -> bool {
624 if self.label_identity == other.label_identity {
625 return true;
626 }
627
628 let key = ordered_label_identity_pair(self.label_identity, other.label_identity);
629 let cache = INDEX_LABEL_EQUALITY_CACHE.get_or_init(|| Mutex::new(FxHashMap::default()));
630 if let Some(equal) = cache
631 .lock()
632 .expect("index label equality cache poisoned")
633 .get(&key)
634 .copied()
635 {
636 return equal;
637 }
638
639 let equal = self.labels == other.labels;
640 let mut guard = cache.lock().expect("index label equality cache poisoned");
641 if guard.len() >= INDEX_LABEL_EQUALITY_CACHE_MAX {
642 guard.clear();
643 }
644 guard.insert(key, equal);
645 equal
646 }
647
648 #[must_use]
653 #[doc(hidden)]
654 pub fn new_known_unique(labels: Vec<IndexLabel>) -> Self {
655 debug_assert!(!detect_duplicates(&labels));
656 let index = Self::new(labels);
657 let _ = index.duplicate_cache.set(false);
658 index
659 }
660
661 #[must_use]
665 #[doc(hidden)]
666 pub fn new_known_unique_int64_unit_range(start: i64, len: usize) -> Self {
667 let labels = IndexLabels::new_int64_unit_range(start, len)
668 .expect("validated Int64 unit range bounds");
669 let index = Self {
670 labels,
671 name: None,
672 label_identity: next_index_label_identity(),
673 duplicate_cache: OnceLock::new(),
674 sort_order_cache: OnceLock::new(),
675 semantic_fingerprint_cache: OnceLock::new(),
676 };
677 let _ = index.duplicate_cache.set(false);
678 let _ = index.sort_order_cache.set(SortOrder::AscendingInt64);
679 index
680 }
681
682 #[must_use]
683 pub fn from_i64(values: Vec<i64>) -> Self {
684 Self::from_i64_values(values)
685 }
686
687 #[must_use]
692 #[doc(hidden)]
693 pub fn from_i64_values(values: Vec<i64>) -> Self {
694 Self {
695 labels: IndexLabels::new_int64_values(Arc::new(values)),
696 name: None,
697 label_identity: next_index_label_identity(),
698 duplicate_cache: OnceLock::new(),
699 sort_order_cache: OnceLock::new(),
700 semantic_fingerprint_cache: OnceLock::new(),
701 }
702 }
703
704 #[must_use]
707 #[doc(hidden)]
708 pub fn int64_label_values(&self) -> Option<Arc<Vec<i64>>> {
709 self.labels.int64_view()
710 }
711
712 #[must_use]
715 #[doc(hidden)]
716 pub fn cached_int64_label_values(&self) -> Option<Option<Arc<Vec<i64>>>> {
717 self.labels.cached_int64_view()
718 }
719
720 #[must_use]
721 pub fn from_utf8(values: Vec<String>) -> Self {
722 Self::new(values.into_iter().map(IndexLabel::from).collect())
723 }
724
725 #[must_use]
726 pub fn from_timedelta64(nanos: Vec<i64>) -> Self {
727 Self::new(nanos.into_iter().map(IndexLabel::Timedelta64).collect())
728 }
729
730 #[must_use]
731 pub fn from_datetime64(nanos: Vec<i64>) -> Self {
732 Self::new(nanos.into_iter().map(IndexLabel::Datetime64).collect())
733 }
734
735 #[must_use]
736 pub fn len(&self) -> usize {
737 self.labels.len()
738 }
739
740 #[must_use]
741 pub fn is_empty(&self) -> bool {
742 self.labels.is_empty()
743 }
744
745 #[must_use]
746 pub fn labels(&self) -> &[IndexLabel] {
747 self.labels.as_slice()
748 }
749
750 #[must_use]
751 #[doc(hidden)]
752 pub fn int64_unit_range_labels(&self) -> Option<(i64, usize)> {
753 self.labels
754 .int64_unit_range()
755 .map(|range| (range.start, range.len))
756 }
757
758 #[must_use]
759 pub fn semantic_labels_fingerprint_with<F>(&self, compute: F) -> String
760 where
761 F: FnOnce(&[IndexLabel]) -> String,
762 {
763 self.semantic_fingerprint_cache
764 .get_or_init(|| compute(self.labels()))
765 .clone()
766 }
767
768 #[must_use]
770 pub fn name(&self) -> Option<&str> {
771 self.name.as_deref()
772 }
773
774 #[must_use]
776 pub fn set_names(&self, name: Option<&str>) -> Self {
777 let mut idx = self.clone();
778 idx.name = name.map(String::from);
779 idx
780 }
781
782 #[must_use]
784 pub fn set_name(&self, name: &str) -> Self {
785 self.set_names(Some(name))
786 }
787
788 #[must_use]
793 pub fn names(&self) -> Vec<Option<String>> {
794 vec![self.name.clone()]
795 }
796
797 #[must_use]
802 pub fn set_names_list(&self, names: &[Option<&str>]) -> Self {
803 assert!(
804 !names.is_empty(),
805 "set_names_list requires at least one name"
806 );
807 self.set_names(names[0])
808 }
809
810 #[must_use]
816 pub fn to_flat_index(&self) -> Self {
817 self.clone()
818 }
819
820 #[must_use]
822 pub fn rename_index(&self, name: Option<&str>) -> Self {
823 self.set_names(name)
824 }
825
826 fn propagate_name(&self, mut other: Self) -> Self {
828 other.name.clone_from(&self.name);
829 other
830 }
831
832 fn shared_name(&self, other: &Self) -> Option<String> {
835 if self.name == other.name {
836 self.name.clone()
837 } else {
838 None
839 }
840 }
841
842 #[must_use]
843 pub fn has_duplicates(&self) -> bool {
844 if self.labels.int64_unit_range().is_some() {
845 return false;
846 }
847 *self.duplicate_cache.get_or_init(|| {
848 if !matches!(self.sort_order(), SortOrder::Unsorted) {
856 return false;
857 }
858 detect_duplicates(self.labels())
859 })
860 }
861
862 #[must_use]
866 pub fn is_unique(&self) -> bool {
867 !self.has_duplicates()
868 }
869
870 #[must_use]
874 pub fn get_loc(&self, label: &IndexLabel) -> Option<usize> {
875 self.position(label)
876 }
877
878 #[must_use]
880 fn sort_order(&self) -> SortOrder {
881 if self.labels.int64_unit_range().is_some() {
882 return SortOrder::AscendingInt64;
883 }
884 *self
885 .sort_order_cache
886 .get_or_init(|| detect_sort_order(self.labels()))
887 }
888
889 #[must_use]
891 pub fn is_sorted(&self) -> bool {
892 !matches!(self.sort_order(), SortOrder::Unsorted)
893 }
894
895 #[must_use]
900 pub fn position(&self, needle: &IndexLabel) -> Option<usize> {
901 if let (Some(range), IndexLabel::Int64(target)) = (self.labels.int64_unit_range(), needle) {
902 return range.position(*target);
903 }
904 match self.sort_order() {
905 SortOrder::AscendingInt64 => {
906 if let IndexLabel::Int64(target) = needle {
907 self.labels
908 .binary_search_by(|label| {
909 if let IndexLabel::Int64(v) = label {
910 v.cmp(target)
911 } else {
912 std::cmp::Ordering::Less
913 }
914 })
915 .ok()
916 } else {
917 None }
919 }
920 SortOrder::AscendingUtf8 => {
921 if let IndexLabel::Utf8(target) = needle {
922 self.labels
923 .binary_search_by(|label| {
924 if let IndexLabel::Utf8(v) = label {
925 v.as_str().cmp(target.as_str())
926 } else {
927 std::cmp::Ordering::Less
928 }
929 })
930 .ok()
931 } else {
932 None
933 }
934 }
935 SortOrder::AscendingTimedelta64 => {
936 if let IndexLabel::Timedelta64(target) = needle {
937 self.labels
938 .binary_search_by(|label| {
939 if let IndexLabel::Timedelta64(v) = label {
940 v.cmp(target)
941 } else {
942 std::cmp::Ordering::Less
943 }
944 })
945 .ok()
946 } else {
947 None
948 }
949 }
950 SortOrder::AscendingDatetime64 => {
951 if let IndexLabel::Datetime64(target) = needle {
952 self.labels
953 .binary_search_by(|label| {
954 if let IndexLabel::Datetime64(v) = label {
955 v.cmp(target)
956 } else {
957 std::cmp::Ordering::Less
958 }
959 })
960 .ok()
961 } else {
962 None
963 }
964 }
965 SortOrder::Unsorted => self.labels.iter().position(|label| label == needle),
966 }
967 }
968
969 #[must_use]
970 pub fn position_map_first(&self) -> HashMap<IndexLabel, usize> {
971 let mut positions = HashMap::with_capacity(self.labels.len());
972 for (idx, label) in self.labels.iter().enumerate() {
973 positions.entry(label.clone()).or_insert(idx);
974 }
975 positions
976 }
977
978 fn position_map_first_ref(&self) -> FxHashMap<&IndexLabel, usize> {
979 let mut positions =
980 FxHashMap::with_capacity_and_hasher(self.labels.len(), Default::default());
981 for (idx, label) in self.labels.iter().enumerate() {
982 positions.entry(label).or_insert(idx);
983 }
984 positions
985 }
986
987 #[must_use]
990 pub fn contains(&self, label: &IndexLabel) -> bool {
991 self.position(label).is_some()
992 }
993
994 #[must_use]
995 pub fn get_indexer(&self, target: &Index) -> Vec<Option<usize>> {
996 if !matches!(self.sort_order(), SortOrder::Unsorted) {
1006 let labels = self.labels();
1007 let targets = target.labels();
1008 if !matches!(target.sort_order(), SortOrder::Unsorted) {
1009 let mut out = Vec::with_capacity(targets.len());
1010 let mut i = 0usize;
1011 for label in targets {
1012 while i < labels.len() && labels[i] < *label {
1013 i += 1;
1014 }
1015 if i < labels.len() && labels[i] == *label {
1016 out.push(Some(i));
1017 } else {
1018 out.push(None);
1019 }
1020 }
1021 return out;
1022 }
1023 return targets.iter().map(|label| self.position(label)).collect();
1024 }
1025 let map = self.position_map_first_ref();
1026 target
1027 .labels
1028 .iter()
1029 .map(|label| map.get(label).copied())
1030 .collect()
1031 }
1032
1033 #[must_use]
1034 pub fn isin(&self, values: &[IndexLabel]) -> Vec<bool> {
1035 let set: FxHashMap<&IndexLabel, ()> = values.iter().map(|v| (v, ())).collect();
1036 self.labels.iter().map(|l| set.contains_key(l)).collect()
1037 }
1038
1039 #[must_use]
1042 pub fn is_monotonic_increasing(&self) -> bool {
1043 if self.labels.len() <= 1 {
1044 return true;
1045 }
1046 for pair in self.labels.windows(2) {
1047 if pair[0] > pair[1] {
1048 return false;
1049 }
1050 }
1051 true
1052 }
1053
1054 #[must_use]
1056 pub fn is_monotonic(&self) -> bool {
1057 self.is_monotonic_increasing()
1058 }
1059
1060 #[must_use]
1061 pub fn is_monotonic_decreasing(&self) -> bool {
1062 if self.labels.len() <= 1 {
1063 return true;
1064 }
1065 for pair in self.labels.windows(2) {
1066 if pair[0] < pair[1] {
1067 return false;
1068 }
1069 }
1070 true
1071 }
1072
1073 #[must_use]
1074 pub fn unique(&self) -> Self {
1075 if !matches!(self.sort_order(), SortOrder::Unsorted) {
1080 return self.clone();
1081 }
1082 let mut seen = FxHashMap::<&IndexLabel, ()>::default();
1083 let labels: Vec<IndexLabel> = self
1084 .labels
1085 .iter()
1086 .filter(|l| seen.insert(l, ()).is_none())
1087 .cloned()
1088 .collect();
1089 self.propagate_name(Self::new(labels))
1090 }
1091
1092 #[must_use]
1093 pub fn duplicated(&self, keep: DuplicateKeep) -> Vec<bool> {
1094 let mut result = vec![false; self.labels.len()];
1095 if !matches!(self.sort_order(), SortOrder::Unsorted) {
1097 return result;
1098 }
1099 match keep {
1100 DuplicateKeep::First => {
1101 let mut seen = FxHashMap::<&IndexLabel, ()>::default();
1102 for (i, label) in self.labels.iter().enumerate() {
1103 if seen.insert(label, ()).is_some() {
1104 result[i] = true;
1105 }
1106 }
1107 }
1108 DuplicateKeep::Last => {
1109 let mut seen = FxHashMap::<&IndexLabel, ()>::default();
1110 for (i, label) in self.labels.iter().enumerate().rev() {
1111 if seen.insert(label, ()).is_some() {
1112 result[i] = true;
1113 }
1114 }
1115 }
1116 DuplicateKeep::None => {
1117 let mut counts = FxHashMap::<&IndexLabel, usize>::default();
1118 for label in &self.labels {
1119 *counts.entry(label).or_insert(0) += 1;
1120 }
1121 for (i, label) in self.labels.iter().enumerate() {
1122 if counts[label] > 1 {
1123 result[i] = true;
1124 }
1125 }
1126 }
1127 }
1128 result
1129 }
1130
1131 #[must_use]
1132 pub fn drop_duplicates(&self) -> Self {
1133 self.drop_duplicates_keep(DuplicateKeep::First)
1134 }
1135
1136 #[must_use]
1140 pub fn drop_duplicates_keep(&self, keep: DuplicateKeep) -> Self {
1141 if !matches!(self.sort_order(), SortOrder::Unsorted) {
1143 return self.clone();
1144 }
1145 let duplicated = self.duplicated(keep);
1146 let labels = self
1147 .labels
1148 .iter()
1149 .zip(duplicated)
1150 .filter(|(_, is_duplicated)| !is_duplicated)
1151 .map(|(label, _)| label.clone())
1152 .collect();
1153 self.propagate_name(Self::new(labels))
1154 }
1155
1156 #[must_use]
1159 pub fn intersection(&self, other: &Self) -> Self {
1160 if let Some(labels) = self.sorted_merge_set_op(other, SetMergeKind::Intersection) {
1165 let mut result = Self::new(labels);
1166 result.name = self.shared_name(other);
1167 return result;
1168 }
1169 let other_set = other.position_map_first_ref();
1170 let mut seen = FxHashMap::<&IndexLabel, ()>::default();
1171 let labels: Vec<IndexLabel> = self
1172 .labels
1173 .iter()
1174 .filter(|l| other_set.contains_key(l) && seen.insert(l, ()).is_none())
1175 .cloned()
1176 .collect();
1177 let mut result = Self::new(labels);
1178 result.name = self.shared_name(other);
1179 result
1180 }
1181
1182 fn sorted_merge_set_op(&self, other: &Self, kind: SetMergeKind) -> Option<Vec<IndexLabel>> {
1188 if matches!(self.sort_order(), SortOrder::Unsorted)
1189 || matches!(other.sort_order(), SortOrder::Unsorted)
1190 {
1191 return None;
1192 }
1193 let a = self.labels();
1194 let b = other.labels();
1195 let mut labels = Vec::with_capacity(a.len().min(b.len()));
1196 let (mut i, mut j) = (0usize, 0usize);
1197 while i < a.len() {
1198 if j >= b.len() {
1199 if kind == SetMergeKind::Difference {
1200 labels.extend_from_slice(&a[i..]);
1201 }
1202 break;
1203 }
1204 match a[i].cmp(&b[j]) {
1205 std::cmp::Ordering::Less => {
1206 if kind == SetMergeKind::Difference {
1207 labels.push(a[i].clone());
1208 }
1209 i += 1;
1210 }
1211 std::cmp::Ordering::Greater => j += 1,
1212 std::cmp::Ordering::Equal => {
1213 if kind == SetMergeKind::Intersection {
1214 labels.push(a[i].clone());
1215 }
1216 i += 1;
1217 j += 1;
1218 }
1219 }
1220 }
1221 Some(labels)
1222 }
1223
1224 #[must_use]
1225 pub fn union_with(&self, other: &Self) -> Self {
1226 let mut seen = FxHashMap::<&IndexLabel, ()>::default();
1227 let mut labels = Vec::with_capacity(self.labels.len() + other.labels.len());
1228 for label in self.labels.iter().chain(other.labels.iter()) {
1229 if seen.insert(label, ()).is_none() {
1230 labels.push(label.clone());
1231 }
1232 }
1233 let mut result = Self::new(labels);
1234 result.name = self.shared_name(other);
1235 result
1236 }
1237
1238 #[must_use]
1239 pub fn difference(&self, other: &Self) -> Self {
1240 if let Some(labels) = self.sorted_merge_set_op(other, SetMergeKind::Difference) {
1243 return self.propagate_name(Self::new(labels));
1244 }
1245 let other_set = other.position_map_first_ref();
1246 let mut seen = FxHashMap::<&IndexLabel, ()>::default();
1247 let labels: Vec<IndexLabel> = self
1248 .labels
1249 .iter()
1250 .filter(|l| !other_set.contains_key(l) && seen.insert(l, ()).is_none())
1251 .cloned()
1252 .collect();
1253 self.propagate_name(Self::new(labels))
1254 }
1255
1256 #[must_use]
1257 pub fn symmetric_difference(&self, other: &Self) -> Self {
1258 let self_set = self.position_map_first_ref();
1259 let other_set = other.position_map_first_ref();
1260 let mut seen = FxHashMap::<&IndexLabel, ()>::default();
1261 let mut labels = Vec::new();
1262 for label in &self.labels {
1263 if !other_set.contains_key(label) && seen.insert(label, ()).is_none() {
1264 labels.push(label.clone());
1265 }
1266 }
1267 for label in &other.labels {
1268 if !self_set.contains_key(label) && seen.insert(label, ()).is_none() {
1269 labels.push(label.clone());
1270 }
1271 }
1272 let mut result = Self::new(labels);
1273 result.name = self.shared_name(other);
1274 result
1275 }
1276
1277 #[must_use]
1280 pub fn argsort(&self) -> Vec<usize> {
1281 let mut indices: Vec<usize> = (0..self.labels.len()).collect();
1282 indices.sort_by(|&a, &b| self.labels[a].cmp(&self.labels[b]));
1283 indices
1284 }
1285
1286 #[must_use]
1287 pub fn sort_values(&self) -> Self {
1288 let order = self.argsort();
1289 self.propagate_name(Self::new(
1290 order.iter().map(|&i| self.labels[i].clone()).collect(),
1291 ))
1292 }
1293
1294 #[must_use]
1295 pub fn take(&self, indices: &[usize]) -> Self {
1296 self.propagate_name(Self::new(
1297 indices.iter().map(|&i| self.labels[i].clone()).collect(),
1298 ))
1299 }
1300
1301 #[must_use]
1302 pub fn slice(&self, start: usize, len: usize) -> Self {
1303 let start = start.min(self.labels.len());
1304 let end = start.saturating_add(len).min(self.labels.len());
1305 self.propagate_name(Self::new(self.labels[start..end].to_vec()))
1306 }
1307
1308 #[must_use]
1309 pub fn from_range(start: i64, stop: i64, step: i64) -> Self {
1310 let mut labels = Vec::new();
1311 let mut val = start;
1312 if step > 0 {
1313 while val < stop {
1314 labels.push(IndexLabel::Int64(val));
1315 val += step;
1316 }
1317 } else if step < 0 {
1318 while val > stop {
1319 labels.push(IndexLabel::Int64(val));
1320 val += step;
1321 }
1322 }
1323 Self::new(labels)
1324 }
1325
1326 #[must_use]
1332 pub fn min(&self) -> Option<&IndexLabel> {
1333 self.labels.iter().min()
1334 }
1335
1336 #[must_use]
1340 pub fn max(&self) -> Option<&IndexLabel> {
1341 self.labels.iter().max()
1342 }
1343
1344 #[must_use]
1348 pub fn argmin(&self) -> Option<usize> {
1349 self.labels
1350 .iter()
1351 .enumerate()
1352 .min_by(|(_, a), (_, b)| a.cmp(b))
1353 .map(|(i, _)| i)
1354 }
1355
1356 #[must_use]
1360 pub fn argmax(&self) -> Option<usize> {
1361 self.labels
1362 .iter()
1363 .enumerate()
1364 .max_by(|(_, a), (_, b)| a.cmp(b))
1365 .map(|(i, _)| i)
1366 }
1367
1368 #[must_use]
1372 pub fn nunique(&self) -> usize {
1373 self.nunique_with_dropna(true)
1374 }
1375
1376 #[must_use]
1380 pub fn nunique_with_dropna(&self, dropna: bool) -> usize {
1381 self.unique()
1382 .labels
1383 .iter()
1384 .filter(|label| !dropna || !label.is_missing())
1385 .count()
1386 }
1387
1388 #[must_use]
1394 pub fn map<F>(&self, func: F) -> Self
1395 where
1396 F: Fn(&IndexLabel) -> IndexLabel,
1397 {
1398 self.propagate_name(Self::new(self.labels.iter().map(&func).collect()))
1399 }
1400
1401 #[must_use]
1406 pub fn rename<F>(&self, func: F) -> Self
1407 where
1408 F: Fn(&IndexLabel) -> IndexLabel,
1409 {
1410 self.map(func)
1411 }
1412
1413 #[must_use]
1417 pub fn drop_labels(&self, labels_to_drop: &[IndexLabel]) -> Self {
1418 self.propagate_name(Self::new(
1419 self.labels
1420 .iter()
1421 .filter(|l| !labels_to_drop.contains(l))
1422 .cloned()
1423 .collect(),
1424 ))
1425 }
1426
1427 #[must_use]
1432 pub fn astype_int(&self) -> Self {
1433 self.propagate_name(Self::new(
1434 self.labels
1435 .iter()
1436 .map(|l| match l {
1437 IndexLabel::Int64(_) => l.clone(),
1438 IndexLabel::Utf8(s) => s
1439 .parse::<i64>()
1440 .map_or_else(|_| l.clone(), IndexLabel::Int64),
1441 IndexLabel::Timedelta64(ns) => IndexLabel::Int64(*ns),
1442 IndexLabel::Datetime64(ns) => IndexLabel::Int64(*ns),
1443 IndexLabel::Null(_) => l.clone(),
1447 })
1448 .collect(),
1449 ))
1450 }
1451
1452 #[must_use]
1456 pub fn astype_str(&self) -> Self {
1457 self.propagate_name(Self::new(
1458 self.labels
1459 .iter()
1460 .map(|l| match l {
1461 IndexLabel::Int64(v) => IndexLabel::Utf8(v.to_string()),
1462 IndexLabel::Utf8(_) => l.clone(),
1463 IndexLabel::Timedelta64(ns) => IndexLabel::Utf8(Timedelta::format(*ns)),
1464 IndexLabel::Datetime64(ns) => IndexLabel::Utf8(format_datetime_ns(*ns)),
1465 IndexLabel::Null(kind) => IndexLabel::Utf8(
1469 match kind {
1470 fp_types::NullKind::Null => "None",
1471 fp_types::NullKind::NaN => "nan",
1472 fp_types::NullKind::NaT => "NaT",
1473 }
1474 .to_owned(),
1475 ),
1476 })
1477 .collect(),
1478 ))
1479 }
1480
1481 pub fn astype(&self, dtype: &str) -> Result<Self, IndexError> {
1486 match dtype {
1487 "int" | "int64" => Ok(self.astype_int()),
1488 "str" | "string" | "object" => Ok(self.astype_str()),
1489 "datetime64[ns]" => {
1490 ensure_index_kind(
1491 self,
1492 |label| matches!(label, IndexLabel::Datetime64(_)),
1493 "DatetimeIndex",
1494 )?;
1495 Ok(self.clone())
1496 }
1497 "timedelta64[ns]" => {
1498 ensure_index_kind(
1499 self,
1500 |label| matches!(label, IndexLabel::Timedelta64(_)),
1501 "TimedeltaIndex",
1502 )?;
1503 Ok(self.clone())
1504 }
1505 other => Err(IndexError::InvalidArgument(format!(
1506 "unsupported Index.astype dtype {other:?}"
1507 ))),
1508 }
1509 }
1510
1511 #[must_use]
1517 pub fn equals(&self, other: &Self) -> bool {
1518 self.labels_equal(other)
1519 }
1520
1521 #[must_use]
1526 pub fn identical(&self, other: &Self) -> bool {
1527 self.labels_equal(other) && self.name == other.name
1528 }
1529
1530 fn value_counts_raw(
1531 &self,
1532 sort: bool,
1533 ascending: bool,
1534 dropna: bool,
1535 ) -> (Vec<(IndexLabel, usize)>, usize) {
1536 let mut seen_order: Vec<IndexLabel> = Vec::new();
1537 let mut counts: FxHashMap<IndexLabel, usize> = FxHashMap::default();
1538 let mut total = 0usize;
1539 for label in &self.labels {
1540 if dropna && label.is_missing() {
1541 continue;
1542 }
1543 total += 1;
1544 if !counts.contains_key(label) {
1545 seen_order.push(label.clone());
1546 }
1547 *counts.entry(label.clone()).or_insert(0) += 1;
1548 }
1549 let mut pairs: Vec<(IndexLabel, usize)> = seen_order
1550 .into_iter()
1551 .map(|label| {
1552 let count = counts[&label];
1553 (label, count)
1554 })
1555 .collect();
1556 if sort {
1557 if ascending {
1558 pairs.sort_by_key(|entry| entry.1);
1559 } else {
1560 pairs.sort_by_key(|entry| std::cmp::Reverse(entry.1));
1561 }
1562 }
1563 (pairs, total)
1564 }
1565
1566 #[must_use]
1571 pub fn value_counts(&self) -> Vec<(IndexLabel, usize)> {
1572 self.value_counts_raw(true, false, true).0
1573 }
1574
1575 #[must_use]
1581 pub fn value_counts_with_options(
1582 &self,
1583 normalize: bool,
1584 sort: bool,
1585 ascending: bool,
1586 dropna: bool,
1587 ) -> Vec<(IndexLabel, Scalar)> {
1588 let (pairs, total) = self.value_counts_raw(sort, ascending, dropna);
1589 if normalize {
1590 let denom = total as f64;
1591 return pairs
1592 .into_iter()
1593 .map(|(label, count)| (label, Scalar::Float64(count as f64 / denom)))
1594 .collect();
1595 }
1596
1597 pairs
1598 .into_iter()
1599 .map(|(label, count)| (label, Scalar::Int64(count as i64)))
1600 .collect()
1601 }
1602
1603 #[must_use]
1611 pub fn shift(&self, periods: i64, fill: IndexLabel) -> Self {
1612 let len = self.labels.len();
1613 if len == 0 || periods == 0 {
1614 return self.clone();
1615 }
1616 let mut out: Vec<IndexLabel> = Vec::with_capacity(len);
1617 let abs = periods.unsigned_abs() as usize;
1618 if abs >= len {
1619 for _ in 0..len {
1620 out.push(fill.clone());
1621 }
1622 } else if periods > 0 {
1623 for _ in 0..abs {
1624 out.push(fill.clone());
1625 }
1626 out.extend_from_slice(&self.labels[..len - abs]);
1627 } else {
1628 out.extend_from_slice(&self.labels[abs..]);
1629 for _ in 0..abs {
1630 out.push(fill.clone());
1631 }
1632 }
1633 self.propagate_name(Self::new(out))
1634 }
1635
1636 #[must_use]
1645 pub fn asof(&self, key: &IndexLabel) -> Option<IndexLabel> {
1646 let mut best: Option<&IndexLabel> = None;
1647 for label in &self.labels {
1648 if label.is_missing() {
1649 continue;
1650 }
1651 if label.cmp(key).is_le() {
1652 best = Some(label);
1653 } else {
1654 break;
1655 }
1656 }
1657 best.cloned()
1658 }
1659
1660 pub fn searchsorted(&self, value: &IndexLabel, side: &str) -> Result<usize, IndexError> {
1667 if side != "left" && side != "right" {
1668 return Err(IndexError::InvalidArgument(format!(
1669 "searchsorted: side must be 'left' or 'right', got {side:?}"
1670 )));
1671 }
1672 if value.is_missing() {
1673 return Err(IndexError::InvalidArgument(
1674 "searchsorted: needle cannot be missing".to_owned(),
1675 ));
1676 }
1677 let mut lo = 0usize;
1678 let mut hi = self.labels.len();
1679 while lo < hi {
1680 let mid = lo + (hi - lo) / 2;
1681 let cmp = if self.labels[mid].is_missing() {
1682 std::cmp::Ordering::Greater
1683 } else {
1684 self.labels[mid].cmp(value)
1685 };
1686 use std::cmp::Ordering;
1687 let go_right = matches!(
1688 (cmp, side),
1689 (Ordering::Less, _) | (Ordering::Equal, "right")
1690 );
1691 if go_right {
1692 lo = mid + 1;
1693 } else {
1694 hi = mid;
1695 }
1696 }
1697 Ok(lo)
1698 }
1699
1700 #[must_use]
1707 pub fn memory_usage(&self, deep: bool) -> usize {
1708 self.labels
1709 .iter()
1710 .map(|label| match label {
1711 IndexLabel::Int64(_)
1712 | IndexLabel::Timedelta64(_)
1713 | IndexLabel::Datetime64(_)
1714 | IndexLabel::Null(_) => 8,
1715 IndexLabel::Utf8(s) => {
1716 if deep {
1717 std::mem::size_of::<String>() + s.len()
1718 } else {
1719 std::mem::size_of::<String>()
1720 }
1721 }
1722 })
1723 .sum()
1724 }
1725
1726 #[must_use]
1732 pub fn nlevels(&self) -> usize {
1733 1
1734 }
1735
1736 #[must_use]
1741 pub fn to_list(&self) -> Vec<IndexLabel> {
1742 self.labels().to_vec()
1743 }
1744
1745 #[must_use]
1750 pub fn format(&self) -> Vec<String> {
1751 self.labels.iter().map(IndexLabel::to_string).collect()
1752 }
1753
1754 #[must_use]
1761 pub fn putmask(&self, cond: &[bool], value: &IndexLabel) -> Self {
1762 let new_labels: Vec<IndexLabel> = self
1763 .labels
1764 .iter()
1765 .enumerate()
1766 .map(|(i, label)| {
1767 if cond.get(i).copied().unwrap_or(false) {
1768 value.clone()
1769 } else {
1770 label.clone()
1771 }
1772 })
1773 .collect();
1774 self.propagate_name(Self::new(new_labels))
1775 }
1776
1777 #[must_use]
1783 pub fn any(&self) -> bool {
1784 self.labels.iter().any(index_label_is_truthy)
1785 }
1786
1787 #[must_use]
1792 pub fn all(&self) -> bool {
1793 self.labels.iter().all(index_label_is_truthy)
1794 }
1795
1796 #[must_use]
1801 pub fn dropna(&self) -> Self {
1802 self.propagate_name(Self::new(
1803 self.labels
1804 .iter()
1805 .filter(|label| !label.is_missing())
1806 .cloned()
1807 .collect(),
1808 ))
1809 }
1810
1811 pub fn insert(&self, loc: usize, item: IndexLabel) -> Result<Self, IndexError> {
1817 if loc > self.labels.len() {
1818 return Err(IndexError::OutOfBounds {
1819 position: loc,
1820 length: self.labels.len(),
1821 });
1822 }
1823 let mut labels = self.labels().to_vec();
1824 labels.insert(loc, item);
1825 Ok(self.propagate_name(Self::new(labels)))
1826 }
1827
1828 pub fn delete(&self, loc: usize) -> Result<Self, IndexError> {
1833 if loc >= self.labels.len() {
1834 return Err(IndexError::OutOfBounds {
1835 position: loc,
1836 length: self.labels.len(),
1837 });
1838 }
1839 let mut labels = self.labels().to_vec();
1840 labels.remove(loc);
1841 Ok(self.propagate_name(Self::new(labels)))
1842 }
1843
1844 #[must_use]
1850 pub fn append(&self, other: &Self) -> Self {
1851 let mut labels = self.labels().to_vec();
1852 labels.extend(other.labels.iter().cloned());
1853 self.propagate_name(Self::new(labels))
1854 }
1855
1856 #[must_use]
1861 pub fn repeat(&self, repeats: usize) -> Self {
1862 if repeats == 0 {
1863 return self.propagate_name(Self::new(Vec::new()));
1864 }
1865 if repeats == 1 {
1866 return self.clone();
1867 }
1868 let mut out = Vec::with_capacity(self.labels.len() * repeats);
1869 for label in &self.labels {
1870 for _ in 0..repeats {
1871 out.push(label.clone());
1872 }
1873 }
1874 self.propagate_name(Self::new(out))
1875 }
1876
1877 #[must_use]
1881 pub fn fillna(&self, value: &IndexLabel) -> Self {
1882 self.propagate_name(Self::new(
1883 self.labels
1884 .iter()
1885 .map(|label| {
1886 if label.is_missing() {
1887 value.clone()
1888 } else {
1889 label.clone()
1890 }
1891 })
1892 .collect(),
1893 ))
1894 }
1895
1896 #[must_use]
1898 pub fn isna(&self) -> Vec<bool> {
1899 self.labels.iter().map(IndexLabel::is_missing).collect()
1900 }
1901
1902 #[must_use]
1904 pub fn notna(&self) -> Vec<bool> {
1905 self.labels
1906 .iter()
1907 .map(|label| !label.is_missing())
1908 .collect()
1909 }
1910
1911 #[must_use]
1915 pub fn where_cond(&self, cond: &[bool], other: &IndexLabel) -> Self {
1916 self.propagate_name(Self::new(
1917 self.labels
1918 .iter()
1919 .enumerate()
1920 .map(|(i, l)| {
1921 if cond.get(i).copied().unwrap_or(false) {
1922 l.clone()
1923 } else {
1924 other.clone()
1925 }
1926 })
1927 .collect(),
1928 ))
1929 }
1930
1931 #[must_use]
1933 pub fn union(&self, other: &Self) -> Self {
1934 self.union_with(other)
1935 }
1936
1937 #[must_use]
1939 pub fn sort(&self) -> Self {
1940 self.sort_values()
1941 }
1942
1943 #[must_use]
1947 pub fn sortlevel(&self) -> (Self, Vec<usize>) {
1948 let order = self.argsort();
1949 (self.take(&order), order)
1950 }
1951
1952 #[must_use]
1954 pub fn drop(&self, labels_to_drop: &[IndexLabel]) -> Self {
1955 self.drop_labels(labels_to_drop)
1956 }
1957
1958 #[must_use]
1960 pub fn copy(&self) -> Self {
1961 self.clone()
1962 }
1963
1964 #[must_use]
1966 pub fn where_(&self, cond: &[bool], other: &IndexLabel) -> Self {
1967 self.where_cond(cond, other)
1968 }
1969
1970 #[must_use]
1972 pub fn tolist(&self) -> Vec<IndexLabel> {
1973 self.to_list()
1974 }
1975
1976 #[must_use]
1978 pub fn to_numpy(&self) -> Vec<IndexLabel> {
1979 self.to_list()
1980 }
1981
1982 #[must_use]
1984 pub fn array(&self) -> Vec<IndexLabel> {
1985 self.to_numpy()
1986 }
1987
1988 #[must_use]
1990 pub fn values(&self) -> Vec<IndexLabel> {
1991 self.to_numpy()
1992 }
1993
1994 #[must_use]
1996 pub fn ravel(&self) -> Vec<IndexLabel> {
1997 self.to_numpy()
1998 }
1999
2000 #[must_use]
2003 pub fn view(&self) -> Self {
2004 self.clone()
2005 }
2006
2007 #[must_use]
2009 pub fn transpose(&self) -> Self {
2010 self.clone()
2011 }
2012
2013 #[allow(non_snake_case)]
2015 #[must_use]
2016 pub fn T(&self) -> Self {
2017 self.transpose()
2018 }
2019
2020 #[must_use]
2023 pub fn to_frame(&self) -> Vec<Vec<IndexLabel>> {
2024 self.labels
2025 .iter()
2026 .map(|label| vec![label.clone()])
2027 .collect()
2028 }
2029
2030 #[must_use]
2033 pub fn to_series(&self) -> Vec<(IndexLabel, IndexLabel)> {
2034 self.labels
2035 .iter()
2036 .map(|label| (label.clone(), label.clone()))
2037 .collect()
2038 }
2039
2040 #[must_use]
2042 pub fn dtype(&self) -> &'static str {
2043 match self.inferred_type() {
2044 "integer" => "int64",
2045 "string" => "object",
2046 "timedelta64" => "timedelta64[ns]",
2047 "datetime64" => "datetime64[ns]",
2048 "empty" | "mixed" => "object",
2049 _ => "object",
2050 }
2051 }
2052
2053 #[must_use]
2056 pub fn dtypes(&self) -> Vec<&'static str> {
2057 vec![self.dtype()]
2058 }
2059
2060 #[must_use]
2062 pub fn infer_objects(&self) -> Self {
2063 self.clone()
2064 }
2065
2066 #[must_use]
2068 pub fn holds_integer(&self) -> bool {
2069 self.is_integer()
2070 }
2071
2072 #[must_use]
2074 pub fn inferred_type(&self) -> &'static str {
2075 if self.labels.is_empty() {
2076 return "empty";
2077 }
2078 let mut non_missing = self.labels.iter().filter(|label| !label.is_missing());
2079 let Some(first) = non_missing.next() else {
2080 return "empty";
2081 };
2082 let same_kind = |label: &IndexLabel| {
2083 matches!(
2084 (first, label),
2085 (IndexLabel::Int64(_), IndexLabel::Int64(_))
2086 | (IndexLabel::Utf8(_), IndexLabel::Utf8(_))
2087 | (IndexLabel::Timedelta64(_), IndexLabel::Timedelta64(_))
2088 | (IndexLabel::Datetime64(_), IndexLabel::Datetime64(_))
2089 )
2090 };
2091 if !non_missing.all(same_kind) {
2092 return "mixed";
2093 }
2094 match first {
2095 IndexLabel::Int64(_) => "integer",
2096 IndexLabel::Utf8(_) => "string",
2097 IndexLabel::Timedelta64(_) => "timedelta64",
2098 IndexLabel::Datetime64(_) => "datetime64",
2099 IndexLabel::Null(_) => "mixed",
2102 }
2103 }
2104
2105 #[must_use]
2107 pub fn hasnans(&self) -> bool {
2108 self.labels.iter().any(IndexLabel::is_missing)
2109 }
2110
2111 #[must_use]
2113 pub fn ndim(&self) -> usize {
2114 1
2115 }
2116
2117 #[must_use]
2119 pub fn shape(&self) -> (usize,) {
2120 (self.len(),)
2121 }
2122
2123 #[must_use]
2125 pub fn size(&self) -> usize {
2126 self.len()
2127 }
2128
2129 #[must_use]
2131 pub fn nbytes(&self) -> usize {
2132 self.memory_usage(false)
2133 }
2134
2135 #[must_use]
2137 pub fn empty(&self) -> bool {
2138 self.is_empty()
2139 }
2140
2141 pub fn item(&self) -> Result<IndexLabel, IndexError> {
2146 if self.len() == 1 {
2147 Ok(self.labels[0].clone())
2148 } else {
2149 Err(IndexError::InvalidArgument(format!(
2150 "item requires exactly one label, got {}",
2151 self.len()
2152 )))
2153 }
2154 }
2155
2156 #[must_use]
2158 pub fn is_(&self, other: &Self) -> bool {
2159 std::ptr::eq(self, other)
2160 }
2161
2162 #[must_use]
2164 pub fn is_boolean(&self) -> bool {
2165 false
2166 }
2167
2168 #[must_use]
2170 pub fn is_categorical(&self) -> bool {
2171 false
2172 }
2173
2174 #[must_use]
2176 pub fn is_floating(&self) -> bool {
2177 false
2178 }
2179
2180 #[must_use]
2182 pub fn is_integer(&self) -> bool {
2183 !self.labels.is_empty()
2184 && self
2185 .labels
2186 .iter()
2187 .filter(|label| !label.is_missing())
2188 .all(|label| matches!(label, IndexLabel::Int64(_)))
2189 }
2190
2191 #[must_use]
2193 pub fn is_interval(&self) -> bool {
2194 false
2195 }
2196
2197 #[must_use]
2199 pub fn is_numeric(&self) -> bool {
2200 self.is_integer()
2201 }
2202
2203 #[must_use]
2205 pub fn is_object(&self) -> bool {
2206 matches!(self.dtype(), "object")
2207 }
2208
2209 #[must_use]
2211 pub fn isnull(&self) -> Vec<bool> {
2212 self.isna()
2213 }
2214
2215 #[must_use]
2217 pub fn notnull(&self) -> Vec<bool> {
2218 self.notna()
2219 }
2220
2221 #[must_use]
2226 pub fn factorize(&self) -> (Vec<isize>, Self) {
2227 let mut positions = FxHashMap::<IndexLabel, isize>::default();
2228 let mut uniques = Vec::<IndexLabel>::new();
2229 let mut codes = Vec::with_capacity(self.labels.len());
2230 for label in &self.labels {
2231 if label.is_missing() {
2232 codes.push(-1);
2233 } else if let Some(code) = positions.get(label) {
2234 codes.push(*code);
2235 } else {
2236 let code = isize::try_from(uniques.len()).unwrap_or(isize::MAX);
2237 positions.insert(label.clone(), code);
2238 uniques.push(label.clone());
2239 codes.push(code);
2240 }
2241 }
2242 (codes, self.propagate_name(Self::new(uniques)))
2243 }
2244
2245 #[must_use]
2247 pub fn get_indexer_for(&self, target: &Self) -> Vec<Option<usize>> {
2248 self.get_indexer(target)
2249 }
2250
2251 #[must_use]
2257 pub fn get_indexer_non_unique(&self, target: &Self) -> (Vec<isize>, Vec<usize>) {
2258 let mut positions = FxHashMap::<IndexLabel, Vec<usize>>::default();
2259 for (position, label) in self.labels.iter().enumerate() {
2260 positions.entry(label.clone()).or_default().push(position);
2261 }
2262
2263 let mut indexer = Vec::new();
2264 let mut missing = Vec::new();
2265 for (target_position, label) in target.labels.iter().enumerate() {
2266 if let Some(source_positions) = positions.get(label) {
2267 indexer.extend(
2268 source_positions
2269 .iter()
2270 .map(|position| isize::try_from(*position).unwrap_or(isize::MAX)),
2271 );
2272 } else {
2273 indexer.push(-1);
2274 missing.push(target_position);
2275 }
2276 }
2277 (indexer, missing)
2278 }
2279
2280 pub fn get_level_values(&self, level: usize) -> Result<Self, IndexError> {
2282 if level == 0 {
2283 Ok(self.clone())
2284 } else {
2285 Err(IndexError::OutOfBounds {
2286 position: level,
2287 length: 1,
2288 })
2289 }
2290 }
2291
2292 pub fn get_slice_bound(&self, label: &IndexLabel, side: &str) -> Result<usize, IndexError> {
2294 self.searchsorted(label, side)
2295 }
2296
2297 pub fn slice_locs(
2299 &self,
2300 start: Option<&IndexLabel>,
2301 end: Option<&IndexLabel>,
2302 ) -> Result<(usize, usize), IndexError> {
2303 let start = match start {
2304 Some(label) => self.get_slice_bound(label, "left")?,
2305 None => 0,
2306 };
2307 let end = match end {
2308 Some(label) => self.get_slice_bound(label, "right")?,
2309 None => self.len(),
2310 };
2311 Ok(if end < start {
2312 (start, start)
2313 } else {
2314 (start, end)
2315 })
2316 }
2317
2318 pub fn slice_indexer(
2320 &self,
2321 start: Option<&IndexLabel>,
2322 end: Option<&IndexLabel>,
2323 ) -> Result<(usize, usize), IndexError> {
2324 self.slice_locs(start, end)
2325 }
2326
2327 #[must_use]
2329 pub fn reindex(&self, target: &Self) -> (Self, Vec<Option<usize>>) {
2330 (target.clone(), self.get_indexer(target))
2331 }
2332
2333 pub fn droplevel(&self, level: usize) -> Result<Self, IndexError> {
2336 if level == 0 {
2337 Err(IndexError::InvalidArgument(
2338 "cannot remove the only level from a flat Index".to_owned(),
2339 ))
2340 } else {
2341 Err(IndexError::OutOfBounds {
2342 position: level,
2343 length: 1,
2344 })
2345 }
2346 }
2347
2348 #[must_use]
2350 pub fn round(&self) -> Self {
2351 self.clone()
2352 }
2353
2354 #[must_use]
2356 pub fn r#str(&self) -> IndexStringAccessor<'_> {
2357 IndexStringAccessor::borrowed(self)
2358 }
2359
2360 #[must_use]
2362 pub fn groupby(&self) -> HashMap<IndexLabel, Vec<usize>> {
2363 let mut groups = HashMap::<IndexLabel, Vec<usize>>::new();
2364 for (position, label) in self.labels.iter().enumerate() {
2365 groups.entry(label.clone()).or_default().push(position);
2366 }
2367 groups
2368 }
2369
2370 pub fn join(&self, other: &Self, how: &str) -> Result<Self, IndexError> {
2372 match how {
2373 "left" => Ok(self.clone()),
2374 "right" => Ok(other.clone()),
2375 "inner" => Ok(self.intersection(other)),
2376 "outer" => Ok(self.union_with(other)),
2377 other => Err(IndexError::InvalidArgument(format!(
2378 "join: how must be 'left', 'right', 'inner', or 'outer', got {other:?}"
2379 ))),
2380 }
2381 }
2382
2383 #[must_use]
2387 pub fn asof_locs(&self, where_index: &Self, mask: Option<&[bool]>) -> Vec<Option<usize>> {
2388 where_index
2389 .labels
2390 .iter()
2391 .map(|key| {
2392 let mut best = None;
2393 for (position, label) in self.labels.iter().enumerate() {
2394 if mask
2395 .and_then(|values| values.get(position))
2396 .is_some_and(|include| !include)
2397 {
2398 continue;
2399 }
2400 if label.is_missing() {
2401 continue;
2402 }
2403 if label.cmp(key).is_le() {
2404 best = Some(position);
2405 } else {
2406 break;
2407 }
2408 }
2409 best
2410 })
2411 .collect()
2412 }
2413
2414 #[must_use]
2420 pub fn diff(&self, periods: usize) -> Vec<Option<IndexLabel>> {
2421 let mut out = vec![None; self.len()];
2422 if periods == 0 {
2423 return out;
2424 }
2425 for (position, slot) in out.iter_mut().enumerate().skip(periods) {
2426 *slot = match (&self.labels[position], &self.labels[position - periods]) {
2427 (IndexLabel::Int64(current), IndexLabel::Int64(previous)) => {
2428 current.checked_sub(*previous).map(IndexLabel::Int64)
2429 }
2430 (IndexLabel::Timedelta64(current), IndexLabel::Timedelta64(previous))
2431 if *current != Timedelta::NAT && *previous != Timedelta::NAT =>
2432 {
2433 current.checked_sub(*previous).map(IndexLabel::Timedelta64)
2434 }
2435 (IndexLabel::Datetime64(current), IndexLabel::Datetime64(previous))
2436 if *current != i64::MIN && *previous != i64::MIN =>
2437 {
2438 current.checked_sub(*previous).map(IndexLabel::Timedelta64)
2439 }
2440 _ => None,
2441 };
2442 }
2443 out
2444 }
2445}
2446
2447#[derive(Debug, Clone)]
2448pub struct IndexStringAccessor<'a> {
2449 index: Cow<'a, Index>,
2450}
2451
2452impl<'a> IndexStringAccessor<'a> {
2453 fn borrowed(index: &'a Index) -> Self {
2454 Self {
2455 index: Cow::Borrowed(index),
2456 }
2457 }
2458
2459 fn owned(index: Index) -> Self {
2460 Self {
2461 index: Cow::Owned(index),
2462 }
2463 }
2464
2465 fn map_utf8<T>(&self, func: impl Fn(&str) -> T) -> Vec<Option<T>> {
2466 self.index
2467 .labels()
2468 .iter()
2469 .map(|label| match label {
2470 IndexLabel::Utf8(value) => Some(func(value)),
2471 IndexLabel::Int64(_)
2472 | IndexLabel::Timedelta64(_)
2473 | IndexLabel::Datetime64(_)
2474 | IndexLabel::Null(_) => None,
2475 })
2476 .collect()
2477 }
2478
2479 #[must_use]
2481 pub fn lower(&self) -> Vec<Option<String>> {
2482 self.map_utf8(str::to_lowercase)
2483 }
2484
2485 #[must_use]
2487 pub fn upper(&self) -> Vec<Option<String>> {
2488 self.map_utf8(str::to_uppercase)
2489 }
2490
2491 #[must_use]
2493 pub fn contains(&self, needle: &str) -> Vec<Option<bool>> {
2494 self.map_utf8(|value| value.contains(needle))
2495 }
2496
2497 #[must_use]
2499 pub fn len(&self) -> Vec<Option<usize>> {
2500 self.map_utf8(str::len)
2501 }
2502
2503 #[must_use]
2505 pub fn is_empty(&self) -> Vec<Option<bool>> {
2506 self.map_utf8(str::is_empty)
2507 }
2508}
2509
2510fn datetime_from_nanos(nanos: i64) -> Option<chrono::DateTime<chrono::Utc>> {
2511 if nanos == i64::MIN {
2512 return None;
2513 }
2514 let secs = nanos.div_euclid(1_000_000_000);
2515 let subsec_nanos = nanos.rem_euclid(1_000_000_000) as u32;
2516 chrono::DateTime::from_timestamp(secs, subsec_nanos)
2517}
2518
2519fn datetime_to_period_error(message: impl Into<String>) -> IndexError {
2520 IndexError::InvalidArgument(format!(
2521 "DatetimeIndex to_period failed: {}",
2522 message.into()
2523 ))
2524}
2525
2526fn date_to_weekly_period_ordinal(date: chrono::NaiveDate) -> Result<i64, IndexError> {
2527 let base = period_epoch_date(1969, 12, 22)?;
2528 Ok(date.signed_duration_since(base).num_days().div_euclid(7))
2529}
2530
2531fn business_period_anchor_date(date: chrono::NaiveDate) -> Result<chrono::NaiveDate, IndexError> {
2532 match date.weekday().num_days_from_monday() {
2533 5 => period_add_days(date, 2),
2534 6 => period_add_days(date, 1),
2535 _ => Ok(date),
2536 }
2537}
2538
2539fn date_to_business_period_ordinal(date: chrono::NaiveDate) -> Result<i64, IndexError> {
2540 let adjusted = business_period_anchor_date(date)?;
2541 let days = adjusted
2542 .signed_duration_since(period_epoch_date(1970, 1, 1)?)
2543 .num_days();
2544 let rem_ordinal = match days.rem_euclid(7) {
2545 0 => 0,
2546 1 => 1,
2547 4 => 2,
2548 5 => 3,
2549 6 => 4,
2550 _ => {
2551 return Err(datetime_to_period_error(
2552 "business period anchor did not land on a business day",
2553 ));
2554 }
2555 };
2556 days.div_euclid(7)
2557 .checked_mul(5)
2558 .and_then(|base| base.checked_add(rem_ordinal))
2559 .ok_or_else(|| datetime_to_period_error("business ordinal overflow"))
2560}
2561
2562fn business_period_end_anchor_date(
2563 date: chrono::NaiveDate,
2564) -> Result<chrono::NaiveDate, IndexError> {
2565 match date.weekday().num_days_from_monday() {
2566 5 => period_add_days(date, -1),
2567 6 => period_add_days(date, -2),
2568 _ => Ok(date),
2569 }
2570}
2571
2572fn datetime_period_ordinal(nanos: i64, freq: PeriodFreq) -> Result<i64, IndexError> {
2573 let dt = datetime_from_nanos(nanos).ok_or_else(|| {
2574 datetime_to_period_error(format!("invalid or NaT datetime nanos {nanos}"))
2575 })?;
2576 let date = dt.date_naive();
2577 let year_offset = i64::from(date.year()) - 1970;
2578 match freq {
2579 PeriodFreq::Annual => Ok(year_offset),
2580 PeriodFreq::Quarterly => year_offset
2581 .checked_mul(4)
2582 .and_then(|base| base.checked_add(i64::from((date.month() - 1) / 3)))
2583 .ok_or_else(|| datetime_to_period_error("quarterly ordinal overflow")),
2584 PeriodFreq::Monthly => year_offset
2585 .checked_mul(12)
2586 .and_then(|base| base.checked_add(i64::from(date.month() - 1)))
2587 .ok_or_else(|| datetime_to_period_error("monthly ordinal overflow")),
2588 PeriodFreq::Daily => {
2589 let epoch = chrono::NaiveDate::from_ymd_opt(1970, 1, 1)
2590 .ok_or_else(|| datetime_to_period_error("invalid epoch boundary"))?;
2591 Ok(date.signed_duration_since(epoch).num_days())
2592 }
2593 PeriodFreq::Hourly => Ok(nanos.div_euclid(Timedelta::NANOS_PER_HOUR)),
2594 PeriodFreq::Minutely => Ok(nanos.div_euclid(Timedelta::NANOS_PER_MIN)),
2595 PeriodFreq::Secondly => Ok(nanos.div_euclid(Timedelta::NANOS_PER_SEC)),
2596 PeriodFreq::Weekly => date_to_weekly_period_ordinal(date),
2597 PeriodFreq::Business => date_to_business_period_ordinal(date),
2598 _ => Err(datetime_to_period_error("unsupported period frequency")),
2599 }
2600}
2601
2602fn datetime_period_ordinal_at_boundary(
2603 nanos: i64,
2604 freq: PeriodFreq,
2605 boundary: PeriodBoundary,
2606) -> Result<i64, IndexError> {
2607 if freq == PeriodFreq::Business && matches!(boundary, PeriodBoundary::End) {
2608 let dt = datetime_from_nanos(nanos).ok_or_else(|| {
2609 datetime_to_period_error(format!("invalid or NaT datetime nanos {nanos}"))
2610 })?;
2611 return date_to_business_period_ordinal(business_period_end_anchor_date(dt.date_naive())?);
2612 }
2613 datetime_period_ordinal(nanos, freq)
2614}
2615
2616fn datetime_nanos_to_period(nanos: i64, freq: PeriodFreq) -> Result<Period, IndexError> {
2617 datetime_period_ordinal(nanos, freq).map(|ordinal| Period::new(ordinal, freq))
2618}
2619
2620fn map_datetime_labels<T, F>(labels: &[IndexLabel], func: F) -> Vec<Option<T>>
2621where
2622 F: Fn(chrono::DateTime<chrono::Utc>) -> T,
2623{
2624 labels
2625 .iter()
2626 .map(|label| match label {
2627 IndexLabel::Datetime64(nanos) => datetime_from_nanos(*nanos).map(&func),
2628 IndexLabel::Int64(_)
2629 | IndexLabel::Utf8(_)
2630 | IndexLabel::Timedelta64(_)
2631 | IndexLabel::Null(_) => None,
2632 })
2633 .collect()
2634}
2635
2636fn time_to_nanos(time: chrono::NaiveTime) -> i64 {
2637 use chrono::Timelike;
2638 i64::from(time.num_seconds_from_midnight()) * 1_000_000_000 + i64::from(time.nanosecond())
2639}
2640
2641fn parse_time_of_day_nanos(time: &str, context: &str) -> Result<i64, IndexError> {
2642 let trimmed = time.trim();
2643 for format in ["%H:%M:%S%.f", "%H:%M:%S", "%H:%M"] {
2644 if let Ok(parsed) = chrono::NaiveTime::parse_from_str(trimmed, format) {
2645 return Ok(time_to_nanos(parsed));
2646 }
2647 }
2648 Err(IndexError::InvalidArgument(format!(
2649 "{context}: invalid time {time:?}; expected HH:MM, HH:MM:SS, or fractional seconds"
2650 )))
2651}
2652
2653fn datetime_label_time_nanos(label: &IndexLabel) -> Option<i64> {
2654 match label {
2655 IndexLabel::Datetime64(nanos) => {
2656 datetime_from_nanos(*nanos).map(|dt| time_to_nanos(dt.time()))
2657 }
2658 IndexLabel::Int64(_)
2659 | IndexLabel::Utf8(_)
2660 | IndexLabel::Timedelta64(_)
2661 | IndexLabel::Null(_) => None,
2662 }
2663}
2664
2665fn time_nanos_in_between(
2666 time: i64,
2667 start: i64,
2668 end: i64,
2669 include_start: bool,
2670 include_end: bool,
2671) -> bool {
2672 let after_start = if include_start {
2673 time >= start
2674 } else {
2675 time > start
2676 };
2677 let before_end = if include_end { time <= end } else { time < end };
2678 if start <= end {
2679 after_start && before_end
2680 } else {
2681 after_start || before_end
2682 }
2683}
2684
2685fn map_timedelta_labels<T, F>(labels: &[IndexLabel], func: F) -> Vec<Option<T>>
2686where
2687 F: Fn(i64) -> T,
2688{
2689 labels
2690 .iter()
2691 .map(|label| match label {
2692 IndexLabel::Timedelta64(nanos) if *nanos != Timedelta::NAT => Some(func(*nanos)),
2693 IndexLabel::Int64(_)
2694 | IndexLabel::Utf8(_)
2695 | IndexLabel::Timedelta64(_)
2696 | IndexLabel::Datetime64(_)
2697 | IndexLabel::Null(_) => None,
2698 })
2699 .collect()
2700}
2701
2702fn timedelta_components_for_index(nanos: i64) -> TimedeltaComponents {
2703 let days = nanos.div_euclid(Timedelta::NANOS_PER_DAY);
2704 let rem = nanos.rem_euclid(Timedelta::NANOS_PER_DAY);
2705
2706 let hours = rem / Timedelta::NANOS_PER_HOUR;
2707 let rem = rem % Timedelta::NANOS_PER_HOUR;
2708 let minutes = rem / Timedelta::NANOS_PER_MIN;
2709 let rem = rem % Timedelta::NANOS_PER_MIN;
2710 let seconds = rem / Timedelta::NANOS_PER_SEC;
2711 let rem = rem % Timedelta::NANOS_PER_SEC;
2712 let milliseconds = rem / Timedelta::NANOS_PER_MILLI;
2713 let rem = rem % Timedelta::NANOS_PER_MILLI;
2714 let microseconds = rem / Timedelta::NANOS_PER_MICRO;
2715 let nanoseconds = rem % Timedelta::NANOS_PER_MICRO;
2716
2717 TimedeltaComponents {
2718 days,
2719 hours,
2720 minutes,
2721 seconds,
2722 milliseconds,
2723 microseconds,
2724 nanoseconds,
2725 }
2726}
2727
2728#[derive(Clone, Copy)]
2729enum TemporalRoundMode {
2730 Floor,
2731 Ceil,
2732 Round,
2733}
2734
2735#[derive(Clone, Copy)]
2736enum PeriodBoundary {
2737 Start,
2738 End,
2739}
2740
2741fn parse_fixed_temporal_freq(freq: &str, context: &str) -> Result<i64, IndexError> {
2742 let trimmed = freq.trim();
2743 let unit_nanos = Timedelta::unit_to_nanos(trimmed)
2744 .or_else(|| Timedelta::parse(trimmed).ok())
2745 .ok_or_else(|| {
2746 IndexError::InvalidArgument(format!("{context}: invalid frequency {freq:?}"))
2747 })?;
2748 if unit_nanos <= 0 {
2749 return Err(IndexError::InvalidArgument(format!(
2750 "{context}: frequency must be positive, got {freq:?}"
2751 )));
2752 }
2753 Ok(unit_nanos)
2754}
2755
2756fn round_nanos_to_unit(nanos: i64, unit_nanos: i64, mode: TemporalRoundMode) -> i64 {
2757 match mode {
2758 TemporalRoundMode::Floor => nanos.div_euclid(unit_nanos).saturating_mul(unit_nanos),
2759 TemporalRoundMode::Ceil => {
2760 let rem = nanos.rem_euclid(unit_nanos);
2761 if rem == 0 {
2762 nanos
2763 } else {
2764 nanos.saturating_add(unit_nanos - rem)
2765 }
2766 }
2767 TemporalRoundMode::Round => {
2768 let floor = nanos.div_euclid(unit_nanos);
2769 let rem = nanos.rem_euclid(unit_nanos);
2770 if rem == 0 {
2771 return nanos;
2772 }
2773 let twice_rem = i128::from(rem) * 2;
2774 let unit = i128::from(unit_nanos);
2775 let chosen = if twice_rem < unit {
2776 floor
2777 } else if twice_rem > unit {
2778 floor.saturating_add(1)
2779 } else if floor % 2 == 0 {
2780 floor
2781 } else {
2782 floor.saturating_add(1)
2783 };
2784 chosen.saturating_mul(unit_nanos)
2785 }
2786 }
2787}
2788
2789fn positional_diff<T>(
2790 len: usize,
2791 periods: i64,
2792 mut diff_at: impl FnMut(usize, usize) -> Option<T>,
2793) -> Vec<Option<T>> {
2794 let mut out = (0..len).map(|_| None).collect::<Vec<_>>();
2795 if periods == 0 {
2796 for (position, slot) in out.iter_mut().enumerate() {
2797 *slot = diff_at(position, position);
2798 }
2799 return out;
2800 }
2801 let Ok(offset) = usize::try_from(periods.unsigned_abs()) else {
2802 return out;
2803 };
2804 if offset >= len {
2805 return out;
2806 }
2807 if periods > 0 {
2808 for (position, slot) in out.iter_mut().enumerate().skip(offset) {
2809 *slot = diff_at(position, position - offset);
2810 }
2811 } else {
2812 for (position, slot) in out.iter_mut().enumerate().take(len - offset) {
2813 *slot = diff_at(position, position + offset);
2814 }
2815 }
2816 out
2817}
2818
2819fn optional_diffs_to_timedelta_index(
2820 values: Vec<Option<i64>>,
2821 name: Option<&str>,
2822) -> TimedeltaIndex {
2823 let mut out = TimedeltaIndex::new(
2824 values
2825 .into_iter()
2826 .map(|value| value.unwrap_or(Timedelta::NAT))
2827 .collect(),
2828 );
2829 if let Some(name) = name {
2830 out = out.set_name(name);
2831 }
2832 out
2833}
2834
2835fn period_timestamp_error(message: impl Into<String>) -> IndexError {
2836 IndexError::InvalidArgument(format!(
2837 "PeriodIndex timestamp conversion failed: {}",
2838 message.into()
2839 ))
2840}
2841
2842fn period_date_error(err: DateRangeError) -> IndexError {
2843 period_timestamp_error(err.to_string())
2844}
2845
2846fn period_date_to_nanos(date: chrono::NaiveDate) -> Result<i64, IndexError> {
2847 date_to_midnight_nanos(date).map_err(period_date_error)
2848}
2849
2850fn period_checked_add_nanos(nanos: i64, delta: i64) -> Result<i64, IndexError> {
2851 nanos
2852 .checked_add(delta)
2853 .ok_or_else(|| period_timestamp_error("nanosecond timestamp overflow"))
2854}
2855
2856fn period_month_start(month_ordinal: i64) -> Result<chrono::NaiveDate, IndexError> {
2857 let year = 1970_i64
2858 .checked_add(month_ordinal.div_euclid(12))
2859 .ok_or_else(|| period_timestamp_error("year overflow"))?;
2860 let year = i32::try_from(year).map_err(|_| period_timestamp_error("year out of range"))?;
2861 let month = u32::try_from(month_ordinal.rem_euclid(12) + 1)
2862 .map_err(|_| period_timestamp_error("month out of range"))?;
2863 chrono::NaiveDate::from_ymd_opt(year, month, 1)
2864 .ok_or_else(|| period_timestamp_error("invalid month boundary"))
2865}
2866
2867fn period_epoch_date(year: i32, month: u32, day: u32) -> Result<chrono::NaiveDate, IndexError> {
2868 chrono::NaiveDate::from_ymd_opt(year, month, day)
2869 .ok_or_else(|| period_timestamp_error("invalid epoch boundary"))
2870}
2871
2872fn period_add_days(date: chrono::NaiveDate, days: i64) -> Result<chrono::NaiveDate, IndexError> {
2873 let delta = chrono::Duration::try_days(days)
2874 .ok_or_else(|| period_timestamp_error("day offset overflow"))?;
2875 date.checked_add_signed(delta)
2876 .ok_or_else(|| period_timestamp_error("date overflow"))
2877}
2878
2879fn period_business_date(ordinal: i64) -> Result<chrono::NaiveDate, IndexError> {
2880 let week = ordinal.div_euclid(5);
2881 let day_offset = match ordinal.rem_euclid(5) {
2882 0 => 0,
2883 1 => 1,
2884 2 => 4,
2885 3 => 5,
2886 4 => 6,
2887 _ => {
2888 return Err(period_timestamp_error(
2889 "business-day remainder out of range",
2890 ));
2891 }
2892 };
2893 let calendar_days = week
2894 .checked_mul(7)
2895 .and_then(|days| days.checked_add(day_offset))
2896 .ok_or_else(|| period_timestamp_error("business-day ordinal overflow"))?;
2897 period_add_days(period_epoch_date(1970, 1, 1)?, calendar_days)
2898}
2899
2900fn period_start_nanos(period: Period) -> Result<i64, IndexError> {
2901 match period.freq {
2902 PeriodFreq::Annual => {
2903 let month_ordinal = period
2904 .ordinal
2905 .checked_mul(12)
2906 .ok_or_else(|| period_timestamp_error("annual ordinal overflow"))?;
2907 period_date_to_nanos(period_month_start(month_ordinal)?)
2908 }
2909 PeriodFreq::Quarterly => {
2910 let month_ordinal = period
2911 .ordinal
2912 .checked_mul(3)
2913 .ok_or_else(|| period_timestamp_error("quarterly ordinal overflow"))?;
2914 period_date_to_nanos(period_month_start(month_ordinal)?)
2915 }
2916 PeriodFreq::Monthly => period_date_to_nanos(period_month_start(period.ordinal)?),
2917 PeriodFreq::Weekly => {
2918 let base = period_epoch_date(1969, 12, 22)?;
2919 let days = period
2920 .ordinal
2921 .checked_mul(7)
2922 .ok_or_else(|| period_timestamp_error("weekly ordinal overflow"))?;
2923 period_date_to_nanos(period_add_days(base, days)?)
2924 }
2925 PeriodFreq::Daily => {
2926 let base = period_epoch_date(1970, 1, 1)?;
2927 period_date_to_nanos(period_add_days(base, period.ordinal)?)
2928 }
2929 PeriodFreq::Business => period_date_to_nanos(period_business_date(period.ordinal)?),
2930 PeriodFreq::Hourly => period
2931 .ordinal
2932 .checked_mul(Timedelta::NANOS_PER_HOUR)
2933 .ok_or_else(|| period_timestamp_error("hourly ordinal overflow")),
2934 PeriodFreq::Minutely => period
2935 .ordinal
2936 .checked_mul(Timedelta::NANOS_PER_MIN)
2937 .ok_or_else(|| period_timestamp_error("minutely ordinal overflow")),
2938 PeriodFreq::Secondly => period
2939 .ordinal
2940 .checked_mul(Timedelta::NANOS_PER_SEC)
2941 .ok_or_else(|| period_timestamp_error("secondly ordinal overflow")),
2942 _ => Err(period_timestamp_error("unsupported period frequency")),
2943 }
2944}
2945
2946fn period_next_start_nanos(period: Period) -> Result<i64, IndexError> {
2947 let next = Period {
2948 ordinal: period
2949 .ordinal
2950 .checked_add(1)
2951 .ok_or_else(|| period_timestamp_error("period ordinal overflow"))?,
2952 freq: period.freq,
2953 };
2954 period_start_nanos(next)
2955}
2956
2957fn period_end_nanos(period: Period) -> Result<i64, IndexError> {
2958 period_checked_add_nanos(period_next_start_nanos(period)?, -1)
2959}
2960
2961fn period_boundary_nanos(period: Period, boundary: PeriodBoundary) -> Result<i64, IndexError> {
2962 match boundary {
2963 PeriodBoundary::Start => period_start_nanos(period),
2964 PeriodBoundary::End => period_end_nanos(period),
2965 }
2966}
2967
2968fn parse_period_boundary_how(how: &str, context: &str) -> Result<PeriodBoundary, IndexError> {
2969 match how.trim().to_ascii_lowercase().as_str() {
2970 "" | "e" | "end" | "finish" => Ok(PeriodBoundary::End),
2971 "s" | "start" | "begin" | "b" => Ok(PeriodBoundary::Start),
2972 other => Err(IndexError::InvalidArgument(format!(
2973 "{context} how must be 'start' or 'end', got {other:?}"
2974 ))),
2975 }
2976}
2977
2978fn period_qyear(period: Period) -> Result<i32, IndexError> {
2979 let end_nanos = period_end_nanos(period)?;
2980 datetime_nanos_to_date(end_nanos)
2981 .map(|date| date.year())
2982 .map_err(period_date_error)
2983}
2984
2985#[derive(Debug, Clone, Copy)]
2986pub struct PeriodFields<'a> {
2987 pub year: &'a [i32],
2988 pub quarter: Option<&'a [u32]>,
2989 pub month: Option<&'a [u32]>,
2990 pub day: Option<&'a [u32]>,
2991 pub hour: Option<&'a [u32]>,
2992 pub minute: Option<&'a [u32]>,
2993 pub second: Option<&'a [u32]>,
2994 pub freq: Option<PeriodFreq>,
2995}
2996
2997impl<'a> PeriodFields<'a> {
2998 #[must_use]
2999 pub const fn new(year: &'a [i32]) -> Self {
3000 Self {
3001 year,
3002 quarter: None,
3003 month: None,
3004 day: None,
3005 hour: None,
3006 minute: None,
3007 second: None,
3008 freq: None,
3009 }
3010 }
3011}
3012
3013fn period_fields_error(message: impl Into<String>) -> IndexError {
3014 IndexError::InvalidArgument(format!(
3015 "PeriodIndex.from_fields failed: {}",
3016 message.into()
3017 ))
3018}
3019
3020fn period_fields_freq(fields: &PeriodFields<'_>) -> Result<PeriodFreq, IndexError> {
3021 let freq = fields
3022 .freq
3023 .or_else(|| fields.quarter.map(|_| PeriodFreq::Quarterly))
3024 .ok_or_else(|| {
3025 period_fields_error("freq is required unless quarter fields imply quarterly periods")
3026 })?;
3027 if fields.quarter.is_some() && freq != PeriodFreq::Quarterly {
3028 return Err(period_fields_error(
3029 "quarter fields require quarterly frequency",
3030 ));
3031 }
3032 Ok(freq)
3033}
3034
3035fn validate_period_field_len(
3036 name: &str,
3037 values: Option<&[u32]>,
3038 expected: usize,
3039) -> Result<(), IndexError> {
3040 if values.is_some_and(|items| items.len() != expected) {
3041 return Err(period_fields_error(format!(
3042 "Mismatched Period array lengths for {name}"
3043 )));
3044 }
3045 Ok(())
3046}
3047
3048fn validate_period_fields(fields: &PeriodFields<'_>) -> Result<(), IndexError> {
3049 let expected = fields.year.len();
3050 validate_period_field_len("quarter", fields.quarter, expected)?;
3051 validate_period_field_len("month", fields.month, expected)?;
3052 validate_period_field_len("day", fields.day, expected)?;
3053 validate_period_field_len("hour", fields.hour, expected)?;
3054 validate_period_field_len("minute", fields.minute, expected)?;
3055 validate_period_field_len("second", fields.second, expected)
3056}
3057
3058fn period_field_value(values: Option<&[u32]>, position: usize, default: u32) -> u32 {
3059 values
3060 .and_then(|items| items.get(position).copied())
3061 .unwrap_or(default)
3062}
3063
3064fn required_period_field(
3065 values: Option<&[u32]>,
3066 name: &str,
3067 position: usize,
3068) -> Result<u32, IndexError> {
3069 values
3070 .and_then(|items| items.get(position).copied())
3071 .ok_or_else(|| period_fields_error(format!("{name} fields are required")))
3072}
3073
3074fn quarter_start_month(quarter: u32) -> Result<u32, IndexError> {
3075 if (1..=4).contains(&quarter) {
3076 Ok((quarter - 1) * 3 + 1)
3077 } else {
3078 Err(period_fields_error(format!(
3079 "quarter must be in 1..=4, got {quarter}"
3080 )))
3081 }
3082}
3083
3084fn period_from_fields_at(
3085 fields: &PeriodFields<'_>,
3086 freq: PeriodFreq,
3087 position: usize,
3088) -> Result<Period, IndexError> {
3089 let year = fields
3090 .year
3091 .get(position)
3092 .copied()
3093 .ok_or_else(|| period_fields_error("year fields are required"))?;
3094 let month = if freq == PeriodFreq::Quarterly {
3095 if let Some(quarters) = fields.quarter {
3096 let quarter = quarters
3097 .get(position)
3098 .copied()
3099 .ok_or_else(|| period_fields_error("quarter fields are required"))?;
3100 quarter_start_month(quarter)?
3101 } else {
3102 required_period_field(fields.month, "month", position)?
3103 }
3104 } else {
3105 if fields.quarter.is_some() && fields.month.is_none() {
3106 return Err(period_fields_error(
3107 "quarter fields require quarterly frequency unless month is also supplied",
3108 ));
3109 }
3110 required_period_field(fields.month, "month", position)?
3111 };
3112 let day = if matches!(
3113 freq,
3114 PeriodFreq::Annual | PeriodFreq::Quarterly | PeriodFreq::Monthly
3115 ) {
3116 1
3117 } else {
3118 period_field_value(fields.day, position, 1)
3119 };
3120 let hour = if matches!(
3121 freq,
3122 PeriodFreq::Hourly | PeriodFreq::Minutely | PeriodFreq::Secondly
3123 ) {
3124 period_field_value(fields.hour, position, 0)
3125 } else {
3126 0
3127 };
3128 let minute = if matches!(freq, PeriodFreq::Minutely | PeriodFreq::Secondly) {
3129 period_field_value(fields.minute, position, 0)
3130 } else {
3131 0
3132 };
3133 let second = if freq == PeriodFreq::Secondly {
3134 period_field_value(fields.second, position, 0)
3135 } else {
3136 0
3137 };
3138 let date = chrono::NaiveDate::from_ymd_opt(year, month, day)
3139 .ok_or_else(|| period_fields_error("invalid year/month/day combination"))?;
3140 let time = chrono::NaiveTime::from_hms_opt(hour, minute, second)
3141 .ok_or_else(|| period_fields_error("invalid hour/minute/second combination"))?;
3142 let nanos = date_and_time_to_nanos(date, time_to_nanos(time)).map_err(period_date_error)?;
3143 datetime_period_ordinal(nanos, freq).map(|ordinal| Period::new(ordinal, freq))
3144}
3145
3146fn ensure_index_kind(
3147 index: &Index,
3148 predicate: impl Fn(&IndexLabel) -> bool,
3149 kind: &str,
3150) -> Result<(), IndexError> {
3151 if index.labels().iter().all(predicate) {
3152 Ok(())
3153 } else {
3154 Err(IndexError::InvalidArgument(format!(
3155 "{kind} requires homogeneous {kind} labels"
3156 )))
3157 }
3158}
3159
3160#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
3167pub struct DatetimeIndex {
3168 index: Index,
3169}
3170
3171impl DatetimeIndex {
3172 #[must_use]
3173 pub fn new(nanos: Vec<i64>) -> Self {
3174 Self {
3175 index: Index::from_datetime64(nanos),
3176 }
3177 }
3178
3179 pub fn from_index(index: Index) -> Result<Self, IndexError> {
3180 ensure_index_kind(
3181 &index,
3182 |label| matches!(label, IndexLabel::Datetime64(_)),
3183 "DatetimeIndex",
3184 )?;
3185 Ok(Self { index })
3186 }
3187
3188 #[must_use]
3189 pub fn as_index(&self) -> &Index {
3190 &self.index
3191 }
3192
3193 #[must_use]
3194 pub fn into_index(self) -> Index {
3195 self.index
3196 }
3197
3198 #[must_use]
3199 pub fn len(&self) -> usize {
3200 self.index.len()
3201 }
3202
3203 #[must_use]
3204 pub fn is_empty(&self) -> bool {
3205 self.index.is_empty()
3206 }
3207
3208 #[must_use]
3209 pub fn name(&self) -> Option<&str> {
3210 self.index.name()
3211 }
3212
3213 #[must_use]
3214 pub fn set_name(&self, name: &str) -> Self {
3215 Self {
3216 index: self.index.set_name(name),
3217 }
3218 }
3219
3220 #[must_use]
3221 pub fn set_names(&self, name: Option<&str>) -> Self {
3222 Self {
3223 index: self.index.set_names(name),
3224 }
3225 }
3226
3227 #[must_use]
3228 pub fn rename_index(&self, name: Option<&str>) -> Self {
3229 self.set_names(name)
3230 }
3231
3232 #[must_use]
3233 pub fn names(&self) -> Vec<Option<String>> {
3234 self.index.names()
3235 }
3236
3237 #[must_use]
3238 pub fn copy(&self) -> Self {
3239 self.clone()
3240 }
3241
3242 #[must_use]
3243 pub fn shape(&self) -> (usize,) {
3244 self.index.shape()
3245 }
3246
3247 #[must_use]
3248 pub fn size(&self) -> usize {
3249 self.index.size()
3250 }
3251
3252 #[must_use]
3253 pub fn empty(&self) -> bool {
3254 self.index.empty()
3255 }
3256
3257 #[must_use]
3258 pub fn dtype(&self) -> &'static str {
3259 "datetime64[ns]"
3260 }
3261
3262 #[must_use]
3263 pub fn dtypes(&self) -> Vec<&'static str> {
3264 vec![self.dtype()]
3265 }
3266
3267 #[must_use]
3268 pub fn memory_usage(&self, deep: bool) -> usize {
3269 self.index.memory_usage(deep)
3270 }
3271
3272 #[must_use]
3273 pub fn nbytes(&self) -> usize {
3274 self.index.nbytes()
3275 }
3276
3277 #[must_use]
3278 pub fn hasnans(&self) -> bool {
3279 self.index.hasnans()
3280 }
3281
3282 #[must_use]
3283 pub fn isna(&self) -> Vec<bool> {
3284 self.index.isna()
3285 }
3286
3287 #[must_use]
3288 pub fn notna(&self) -> Vec<bool> {
3289 self.index.notna()
3290 }
3291
3292 #[must_use]
3293 pub fn is_unique(&self) -> bool {
3294 self.index.is_unique()
3295 }
3296
3297 #[must_use]
3298 pub fn has_duplicates(&self) -> bool {
3299 self.index.has_duplicates()
3300 }
3301
3302 #[must_use]
3303 pub fn is_monotonic_increasing(&self) -> bool {
3304 self.index.is_monotonic_increasing()
3305 }
3306
3307 #[must_use]
3308 pub fn is_monotonic(&self) -> bool {
3309 self.index.is_monotonic()
3310 }
3311
3312 #[must_use]
3313 pub fn is_monotonic_decreasing(&self) -> bool {
3314 self.index.is_monotonic_decreasing()
3315 }
3316
3317 #[must_use]
3318 pub fn nunique(&self) -> usize {
3319 self.index.nunique()
3320 }
3321
3322 #[must_use]
3323 pub fn nunique_with_dropna(&self, dropna: bool) -> usize {
3324 self.index.nunique_with_dropna(dropna)
3325 }
3326
3327 #[must_use]
3328 pub fn ndim(&self) -> usize {
3329 self.index.ndim()
3330 }
3331
3332 pub fn item(&self) -> Result<Option<i64>, IndexError> {
3333 match self.index.item()? {
3334 IndexLabel::Datetime64(nanos) if nanos != i64::MIN => Ok(Some(nanos)),
3335 IndexLabel::Datetime64(_) => Ok(None),
3336 label => Err(IndexError::InvalidArgument(format!(
3337 "DatetimeIndex item must be datetime64, got {label}"
3338 ))),
3339 }
3340 }
3341
3342 #[must_use]
3343 pub fn is_(&self, other: &Self) -> bool {
3344 std::ptr::eq(self, other)
3345 }
3346
3347 #[must_use]
3348 pub fn equals(&self, other: &Self) -> bool {
3349 self.index.equals(&other.index)
3350 }
3351
3352 #[must_use]
3353 pub fn identical(&self, other: &Self) -> bool {
3354 self.index.identical(&other.index)
3355 }
3356
3357 #[must_use]
3358 pub fn holds_integer(&self) -> bool {
3359 false
3360 }
3361
3362 #[must_use]
3363 pub fn inferred_type(&self) -> &'static str {
3364 "datetime64"
3365 }
3366
3367 #[must_use]
3368 pub fn is_boolean(&self) -> bool {
3369 false
3370 }
3371
3372 #[must_use]
3373 pub fn is_categorical(&self) -> bool {
3374 false
3375 }
3376
3377 #[must_use]
3378 pub fn is_floating(&self) -> bool {
3379 false
3380 }
3381
3382 #[must_use]
3383 pub fn is_integer(&self) -> bool {
3384 false
3385 }
3386
3387 #[must_use]
3388 pub fn is_interval(&self) -> bool {
3389 false
3390 }
3391
3392 #[must_use]
3393 pub fn is_numeric(&self) -> bool {
3394 false
3395 }
3396
3397 #[must_use]
3398 pub fn is_object(&self) -> bool {
3399 false
3400 }
3401
3402 #[must_use]
3403 pub fn nanos(&self) -> Vec<Option<i64>> {
3404 self.index
3405 .labels()
3406 .iter()
3407 .map(|label| match label {
3408 IndexLabel::Datetime64(nanos) if *nanos != i64::MIN => Some(*nanos),
3409 IndexLabel::Int64(_)
3410 | IndexLabel::Utf8(_)
3411 | IndexLabel::Timedelta64(_)
3412 | IndexLabel::Datetime64(_)
3413 | IndexLabel::Null(_) => None,
3414 })
3415 .collect()
3416 }
3417
3418 #[must_use]
3419 pub fn values(&self) -> Vec<Option<i64>> {
3420 self.nanos()
3421 }
3422
3423 #[must_use]
3424 pub fn to_list(&self) -> Vec<Option<i64>> {
3425 self.nanos()
3426 }
3427
3428 #[must_use]
3429 pub fn tolist(&self) -> Vec<Option<i64>> {
3430 self.to_list()
3431 }
3432
3433 #[must_use]
3434 pub fn to_numpy(&self) -> Vec<Option<i64>> {
3435 self.nanos()
3436 }
3437
3438 #[must_use]
3439 pub fn array(&self) -> Vec<Option<i64>> {
3440 self.nanos()
3441 }
3442
3443 #[must_use]
3446 pub fn asi8(&self) -> Vec<i64> {
3447 self.index
3448 .labels()
3449 .iter()
3450 .map(|label| match label {
3451 IndexLabel::Datetime64(nanos) => *nanos,
3452 IndexLabel::Int64(_)
3453 | IndexLabel::Utf8(_)
3454 | IndexLabel::Timedelta64(_)
3455 | IndexLabel::Null(_) => i64::MIN,
3456 })
3457 .collect()
3458 }
3459
3460 pub fn to_period(&self, freq: &str) -> Result<PeriodIndex, IndexError> {
3464 let period_freq = PeriodFreq::parse(freq).ok_or_else(|| {
3465 IndexError::InvalidArgument(format!("to_period: unsupported frequency '{freq}'"))
3466 })?;
3467 let periods = self
3468 .index
3469 .labels()
3470 .iter()
3471 .map(|label| match label {
3472 IndexLabel::Datetime64(nanos) => datetime_nanos_to_period(*nanos, period_freq),
3473 other => Err(IndexError::InvalidArgument(format!(
3474 "to_period requires DatetimeIndex labels, got {other:?}"
3475 ))),
3476 })
3477 .collect::<Result<Vec<_>, _>>()?;
3478 let mut out = PeriodIndex::new(periods);
3479 if let Some(name) = self.name() {
3480 out = out.set_name(name);
3481 }
3482 Ok(out)
3483 }
3484
3485 #[must_use]
3488 pub fn strftime(&self, format: &str) -> Vec<Option<String>> {
3489 map_datetime_labels(self.index.labels(), |dt| dt.format(format).to_string())
3490 }
3491
3492 pub fn argmax(&self) -> Result<usize, IndexError> {
3498 let labels = self.index.labels();
3499 let mut best: Option<usize> = None;
3500 for (i, label) in labels.iter().enumerate() {
3501 let nanos = match label {
3502 IndexLabel::Datetime64(n) if *n != i64::MIN => *n,
3503 _ => continue,
3504 };
3505 best = Some(match best {
3506 Some(b) => match labels[b] {
3507 IndexLabel::Datetime64(prev) if nanos > prev => i,
3508 _ => b,
3509 },
3510 None => i,
3511 });
3512 }
3513 best.ok_or_else(|| {
3514 IndexError::InvalidArgument("attempt to get argmax of an empty sequence".to_owned())
3515 })
3516 }
3517
3518 pub fn argmin(&self) -> Result<usize, IndexError> {
3521 let labels = self.index.labels();
3522 let mut best: Option<usize> = None;
3523 for (i, label) in labels.iter().enumerate() {
3524 let nanos = match label {
3525 IndexLabel::Datetime64(n) if *n != i64::MIN => *n,
3526 _ => continue,
3527 };
3528 best = Some(match best {
3529 Some(b) => match labels[b] {
3530 IndexLabel::Datetime64(prev) if nanos < prev => i,
3531 _ => b,
3532 },
3533 None => i,
3534 });
3535 }
3536 best.ok_or_else(|| {
3537 IndexError::InvalidArgument("attempt to get argmin of an empty sequence".to_owned())
3538 })
3539 }
3540
3541 #[must_use]
3544 pub fn argsort(&self) -> Vec<usize> {
3545 self.index.argsort()
3546 }
3547
3548 pub fn unique(&self) -> Result<Self, IndexError> {
3551 Self::from_index(self.index.unique())
3552 }
3553
3554 pub fn factorize(&self) -> Result<(Vec<isize>, Self), IndexError> {
3557 let (codes, uniques) = self.index.factorize();
3558 Ok((codes, Self::from_index(uniques)?))
3559 }
3560
3561 #[must_use]
3563 pub fn value_counts(&self) -> Vec<(IndexLabel, usize)> {
3564 self.index.value_counts()
3565 }
3566
3567 #[must_use]
3569 pub fn duplicated(&self, keep: DuplicateKeep) -> Vec<bool> {
3570 self.index.duplicated(keep)
3571 }
3572
3573 pub fn drop_duplicates(&self) -> Result<Self, IndexError> {
3575 Self::from_index(self.index.drop_duplicates())
3576 }
3577
3578 pub fn take(&self, positions: &[usize]) -> Result<Self, IndexError> {
3581 let labels = self.index.labels();
3582 for &p in positions {
3583 if p >= labels.len() {
3584 return Err(IndexError::OutOfBounds {
3585 position: p,
3586 length: labels.len(),
3587 });
3588 }
3589 }
3590 let nanos: Vec<i64> = positions
3591 .iter()
3592 .map(|&p| match labels[p] {
3593 IndexLabel::Datetime64(n) => n,
3594 _ => i64::MIN,
3595 })
3596 .collect();
3597 let mut out = Self::new(nanos);
3598 if let Some(name) = self.name() {
3599 out = out.set_name(name);
3600 }
3601 Ok(out)
3602 }
3603
3604 #[must_use]
3606 pub fn repeat(&self, repeats: usize) -> Self {
3607 let mut out = Vec::with_capacity(self.len() * repeats);
3608 for label in self.index.labels() {
3609 if let IndexLabel::Datetime64(n) = label {
3610 for _ in 0..repeats {
3611 out.push(*n);
3612 }
3613 }
3614 }
3615 let mut result = Self::new(out);
3616 if let Some(name) = self.name() {
3617 result = result.set_name(name);
3618 }
3619 result
3620 }
3621
3622 #[must_use]
3626 pub fn isin(&self, values: &[i64]) -> Vec<bool> {
3627 let needle: FxHashSet<i64> = values.iter().copied().collect();
3628 self.index
3629 .labels()
3630 .iter()
3631 .map(|label| match label {
3632 IndexLabel::Datetime64(n) => needle.contains(n),
3633 _ => false,
3634 })
3635 .collect()
3636 }
3637
3638 #[must_use]
3642 pub fn append(&self, other: &Self) -> Self {
3643 let mut nanos: Vec<i64> = self
3644 .index
3645 .labels()
3646 .iter()
3647 .filter_map(|label| match label {
3648 IndexLabel::Datetime64(n) => Some(*n),
3649 _ => None,
3650 })
3651 .collect();
3652 nanos.extend(other.index.labels().iter().filter_map(|label| match label {
3653 IndexLabel::Datetime64(n) => Some(*n),
3654 _ => None,
3655 }));
3656 let mut out = Self::new(nanos);
3657 if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
3658 out = out.set_name(name);
3659 }
3660 out
3661 }
3662
3663 #[must_use]
3666 pub fn min(&self) -> Option<i64> {
3667 self.index
3668 .labels()
3669 .iter()
3670 .filter_map(|label| match label {
3671 IndexLabel::Datetime64(n) if *n != i64::MIN => Some(*n),
3672 _ => None,
3673 })
3674 .min()
3675 }
3676
3677 #[must_use]
3682 pub fn shift(&self, periods: i64, freq_nanos: i64) -> Self {
3683 let delta = periods.saturating_mul(freq_nanos);
3684 let nanos: Vec<i64> = self
3685 .index
3686 .labels()
3687 .iter()
3688 .map(|label| match label {
3689 IndexLabel::Datetime64(n) if *n != i64::MIN => n.saturating_add(delta),
3690 _ => i64::MIN,
3691 })
3692 .collect();
3693 let mut out = Self::new(nanos);
3694 if let Some(name) = self.name() {
3695 out = out.set_name(name);
3696 }
3697 out
3698 }
3699
3700 #[must_use]
3705 pub fn diff(&self, periods: i64) -> TimedeltaIndex {
3706 let labels = self.index.labels();
3707 optional_diffs_to_timedelta_index(
3708 positional_diff(labels.len(), periods, |current, previous| {
3709 match (&labels[current], &labels[previous]) {
3710 (
3711 IndexLabel::Datetime64(current_nanos),
3712 IndexLabel::Datetime64(previous_nanos),
3713 ) if *current_nanos != i64::MIN && *previous_nanos != i64::MIN => {
3714 current_nanos.checked_sub(*previous_nanos)
3715 }
3716 _ => None,
3717 }
3718 }),
3719 self.name(),
3720 )
3721 }
3722
3723 fn round_fixed_freq(&self, freq: &str, mode: TemporalRoundMode) -> Result<Self, IndexError> {
3724 let unit_nanos = parse_fixed_temporal_freq(freq, "DatetimeIndex rounding")?;
3725 let nanos: Vec<i64> = self
3726 .index
3727 .labels()
3728 .iter()
3729 .map(|label| match label {
3730 IndexLabel::Datetime64(n) if *n != i64::MIN => {
3731 round_nanos_to_unit(*n, unit_nanos, mode)
3732 }
3733 _ => i64::MIN,
3734 })
3735 .collect();
3736 let mut out = Self::new(nanos);
3737 if let Some(name) = self.name() {
3738 out = out.set_name(name);
3739 }
3740 Ok(out)
3741 }
3742
3743 pub fn floor(&self, freq: &str) -> Result<Self, IndexError> {
3745 self.round_fixed_freq(freq, TemporalRoundMode::Floor)
3746 }
3747
3748 pub fn ceil(&self, freq: &str) -> Result<Self, IndexError> {
3750 self.round_fixed_freq(freq, TemporalRoundMode::Ceil)
3751 }
3752
3753 pub fn round(&self, freq: &str) -> Result<Self, IndexError> {
3755 self.round_fixed_freq(freq, TemporalRoundMode::Round)
3756 }
3757
3758 pub fn snap(&self, freq: &str) -> Result<Self, IndexError> {
3760 parse_fixed_temporal_freq(freq, "DatetimeIndex.snap")?;
3761 Ok(self.clone())
3762 }
3763
3764 #[must_use]
3768 pub fn mean(&self) -> Option<i64> {
3769 let mut total: i128 = 0;
3770 let mut count: i128 = 0;
3771 for label in self.index.labels() {
3772 if let IndexLabel::Datetime64(n) = label
3773 && *n != i64::MIN
3774 {
3775 total += i128::from(*n);
3776 count += 1;
3777 }
3778 }
3779 if count == 0 {
3780 return None;
3781 }
3782 i64::try_from(total / count).ok()
3783 }
3784
3785 #[must_use]
3789 pub fn var(&self) -> Option<f64> {
3790 let nanos: Vec<f64> = self
3791 .index
3792 .labels()
3793 .iter()
3794 .filter_map(|label| match label {
3795 IndexLabel::Datetime64(n) if *n != i64::MIN => Some(*n as f64),
3796 _ => None,
3797 })
3798 .collect();
3799 if nanos.len() < 2 {
3800 return None;
3801 }
3802 let mean = nanos.iter().sum::<f64>() / nanos.len() as f64;
3803 Some(nanos.iter().map(|n| (n - mean).powi(2)).sum::<f64>() / (nanos.len() as f64 - 1.0))
3804 }
3805
3806 #[must_use]
3810 pub fn std(&self) -> Option<i64> {
3811 let nanos: Vec<f64> = self
3812 .index
3813 .labels()
3814 .iter()
3815 .filter_map(|label| match label {
3816 IndexLabel::Datetime64(n) if *n != i64::MIN => Some(*n as f64),
3817 _ => None,
3818 })
3819 .collect();
3820 if nanos.len() < 2 {
3821 return None;
3822 }
3823 let mean = nanos.iter().sum::<f64>() / nanos.len() as f64;
3824 let var =
3825 nanos.iter().map(|n| (n - mean).powi(2)).sum::<f64>() / (nanos.len() as f64 - 1.0);
3826 Some(var.sqrt() as i64)
3827 }
3828
3829 #[must_use]
3833 pub fn median(&self) -> Option<i64> {
3834 let mut nanos: Vec<i64> = self
3835 .index
3836 .labels()
3837 .iter()
3838 .filter_map(|label| match label {
3839 IndexLabel::Datetime64(n) if *n != i64::MIN => Some(*n),
3840 _ => None,
3841 })
3842 .collect();
3843 if nanos.is_empty() {
3844 return None;
3845 }
3846 nanos.sort_unstable();
3847 let mid = nanos.len() / 2;
3848 if nanos.len() % 2 == 1 {
3849 Some(nanos[mid])
3850 } else {
3851 let total = i128::from(nanos[mid - 1]) + i128::from(nanos[mid]);
3852 i64::try_from(total / 2).ok()
3853 }
3854 }
3855
3856 #[must_use]
3858 pub fn max(&self) -> Option<i64> {
3859 self.index
3860 .labels()
3861 .iter()
3862 .filter_map(|label| match label {
3863 IndexLabel::Datetime64(n) if *n != i64::MIN => Some(*n),
3864 _ => None,
3865 })
3866 .max()
3867 }
3868
3869 #[must_use]
3873 pub fn intersection(&self, other: &Self) -> Self {
3874 let other_set: FxHashSet<i64> = other
3875 .index
3876 .labels()
3877 .iter()
3878 .filter_map(|label| match label {
3879 IndexLabel::Datetime64(n) => Some(*n),
3880 _ => None,
3881 })
3882 .collect();
3883 let mut seen = FxHashSet::<i64>::default();
3884 let nanos: Vec<i64> = self
3885 .index
3886 .labels()
3887 .iter()
3888 .filter_map(|label| match label {
3889 IndexLabel::Datetime64(n) if other_set.contains(n) && seen.insert(*n) => Some(*n),
3890 _ => None,
3891 })
3892 .collect();
3893 let mut out = Self::new(nanos);
3894 if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
3895 out = out.set_name(name);
3896 }
3897 out
3898 }
3899
3900 #[must_use]
3903 pub fn union(&self, other: &Self) -> Self {
3904 let mut seen = FxHashSet::<i64>::default();
3905 let mut nanos: Vec<i64> = Vec::new();
3906 for label in self
3907 .index
3908 .labels()
3909 .iter()
3910 .chain(other.index.labels().iter())
3911 {
3912 if let IndexLabel::Datetime64(n) = label
3913 && seen.insert(*n)
3914 {
3915 nanos.push(*n);
3916 }
3917 }
3918 let mut out = Self::new(nanos);
3919 if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
3920 out = out.set_name(name);
3921 }
3922 out
3923 }
3924
3925 #[must_use]
3928 pub fn difference(&self, other: &Self) -> Self {
3929 let other_set: FxHashSet<i64> = other
3930 .index
3931 .labels()
3932 .iter()
3933 .filter_map(|label| match label {
3934 IndexLabel::Datetime64(n) => Some(*n),
3935 _ => None,
3936 })
3937 .collect();
3938 let mut seen = FxHashSet::<i64>::default();
3939 let nanos: Vec<i64> = self
3940 .index
3941 .labels()
3942 .iter()
3943 .filter_map(|label| match label {
3944 IndexLabel::Datetime64(n) if !other_set.contains(n) && seen.insert(*n) => Some(*n),
3945 _ => None,
3946 })
3947 .collect();
3948 let mut out = Self::new(nanos);
3949 if let Some(name) = self.name() {
3953 out = out.set_name(name);
3954 }
3955 out
3956 }
3957
3958 #[must_use]
3961 pub fn symmetric_difference(&self, other: &Self) -> Self {
3962 let self_set: FxHashSet<i64> = self
3963 .index
3964 .labels()
3965 .iter()
3966 .filter_map(|label| match label {
3967 IndexLabel::Datetime64(n) => Some(*n),
3968 _ => None,
3969 })
3970 .collect();
3971 let other_set: FxHashSet<i64> = other
3972 .index
3973 .labels()
3974 .iter()
3975 .filter_map(|label| match label {
3976 IndexLabel::Datetime64(n) => Some(*n),
3977 _ => None,
3978 })
3979 .collect();
3980 let mut seen = FxHashSet::<i64>::default();
3981 let mut nanos: Vec<i64> = Vec::new();
3982 for label in self.index.labels() {
3983 if let IndexLabel::Datetime64(n) = label
3984 && !other_set.contains(n)
3985 && seen.insert(*n)
3986 {
3987 nanos.push(*n);
3988 }
3989 }
3990 for label in other.index.labels() {
3991 if let IndexLabel::Datetime64(n) = label
3992 && !self_set.contains(n)
3993 && seen.insert(*n)
3994 {
3995 nanos.push(*n);
3996 }
3997 }
3998 let mut out = Self::new(nanos);
3999 if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
4000 out = out.set_name(name);
4001 }
4002 out
4003 }
4004
4005 #[must_use]
4009 pub fn sort_values(&self) -> Self {
4010 let mut nanos: Vec<i64> = self
4011 .index
4012 .labels()
4013 .iter()
4014 .filter_map(|label| match label {
4015 IndexLabel::Datetime64(n) => Some(*n),
4016 _ => None,
4017 })
4018 .collect();
4019 nanos.sort_unstable();
4020 let mut out = Self::new(nanos);
4021 if let Some(name) = self.name() {
4022 out = out.set_name(name);
4023 }
4024 out
4025 }
4026
4027 #[must_use]
4029 pub fn sort(&self) -> Self {
4030 self.sort_values()
4031 }
4032
4033 pub fn delete(&self, loc: usize) -> Result<Self, IndexError> {
4036 let labels = self.index.labels();
4037 if loc >= labels.len() {
4038 return Err(IndexError::OutOfBounds {
4039 position: loc,
4040 length: labels.len(),
4041 });
4042 }
4043 let nanos: Vec<i64> = labels
4044 .iter()
4045 .enumerate()
4046 .filter(|(i, _)| *i != loc)
4047 .filter_map(|(_, label)| match label {
4048 IndexLabel::Datetime64(n) => Some(*n),
4049 _ => None,
4050 })
4051 .collect();
4052 let mut out = Self::new(nanos);
4053 if let Some(name) = self.name() {
4054 out = out.set_name(name);
4055 }
4056 Ok(out)
4057 }
4058
4059 pub fn r#where(&self, cond: &[bool], other: i64) -> Result<Self, IndexError> {
4063 let labels = self.index.labels();
4064 if cond.len() != labels.len() {
4065 return Err(IndexError::LengthMismatch {
4066 expected: labels.len(),
4067 actual: cond.len(),
4068 context: "where: cond length must match index length".to_owned(),
4069 });
4070 }
4071 let nanos: Vec<i64> = labels
4072 .iter()
4073 .zip(cond.iter())
4074 .map(|(label, &keep)| {
4075 if keep {
4076 match label {
4077 IndexLabel::Datetime64(n) => *n,
4078 _ => i64::MIN,
4079 }
4080 } else {
4081 other
4082 }
4083 })
4084 .collect();
4085 let mut out = Self::new(nanos);
4086 if let Some(name) = self.name() {
4087 out = out.set_name(name);
4088 }
4089 Ok(out)
4090 }
4091
4092 pub fn putmask(&self, mask: &[bool], value: i64) -> Result<Self, IndexError> {
4095 let labels = self.index.labels();
4096 if mask.len() != labels.len() {
4097 return Err(IndexError::LengthMismatch {
4098 expected: labels.len(),
4099 actual: mask.len(),
4100 context: "putmask: mask length must match index length".to_owned(),
4101 });
4102 }
4103 let nanos: Vec<i64> = labels
4104 .iter()
4105 .zip(mask.iter())
4106 .map(|(label, &replace)| {
4107 if replace {
4108 value
4109 } else {
4110 match label {
4111 IndexLabel::Datetime64(n) => *n,
4112 _ => i64::MIN,
4113 }
4114 }
4115 })
4116 .collect();
4117 let mut out = Self::new(nanos);
4118 if let Some(name) = self.name() {
4119 out = out.set_name(name);
4120 }
4121 Ok(out)
4122 }
4123
4124 pub fn searchsorted(&self, value: i64, side: &str) -> Result<usize, IndexError> {
4130 self.index
4131 .searchsorted(&IndexLabel::Datetime64(value), side)
4132 }
4133
4134 #[must_use]
4137 pub fn to_pydatetime(&self) -> Vec<Option<chrono::DateTime<chrono::Utc>>> {
4138 self.index
4139 .labels()
4140 .iter()
4141 .map(|label| match label {
4142 IndexLabel::Datetime64(nanos) => datetime_from_nanos(*nanos),
4143 _ => None,
4144 })
4145 .collect()
4146 }
4147
4148 pub fn insert(&self, loc: usize, value: i64) -> Result<Self, IndexError> {
4152 let labels = self.index.labels();
4153 if loc > labels.len() {
4154 return Err(IndexError::OutOfBounds {
4155 position: loc,
4156 length: labels.len(),
4157 });
4158 }
4159 let mut nanos: Vec<i64> = labels
4160 .iter()
4161 .filter_map(|label| match label {
4162 IndexLabel::Datetime64(n) => Some(*n),
4163 _ => None,
4164 })
4165 .collect();
4166 nanos.insert(loc, value);
4167 let mut out = Self::new(nanos);
4168 if let Some(name) = self.name() {
4169 out = out.set_name(name);
4170 }
4171 Ok(out)
4172 }
4173
4174 #[must_use]
4178 pub fn format(&self) -> Vec<String> {
4179 self.index
4180 .labels()
4181 .iter()
4182 .map(|label| match label {
4183 IndexLabel::Datetime64(nanos) => match datetime_from_nanos(*nanos) {
4184 Some(dt) => dt.to_rfc3339(),
4185 None => "NaT".to_owned(),
4186 },
4187 _ => "NaT".to_owned(),
4188 })
4189 .collect()
4190 }
4191
4192 #[must_use]
4195 pub fn fillna(&self, value: i64) -> Self {
4196 let nanos: Vec<i64> = self
4197 .index
4198 .labels()
4199 .iter()
4200 .map(|label| match label {
4201 IndexLabel::Datetime64(n) if *n != i64::MIN => *n,
4202 _ => value,
4203 })
4204 .collect();
4205 let mut out = Self::new(nanos);
4206 if let Some(name) = self.name() {
4207 out = out.set_name(name);
4208 }
4209 out
4210 }
4211
4212 #[must_use]
4214 pub fn isnull(&self) -> Vec<bool> {
4215 self.isna()
4216 }
4217
4218 #[must_use]
4220 pub fn notnull(&self) -> Vec<bool> {
4221 self.notna()
4222 }
4223
4224 #[must_use]
4226 pub fn date(&self) -> Vec<Option<chrono::NaiveDate>> {
4227 map_datetime_labels(self.index.labels(), |dt| dt.date_naive())
4228 }
4229
4230 #[must_use]
4233 pub fn time(&self) -> Vec<Option<chrono::NaiveTime>> {
4234 map_datetime_labels(self.index.labels(), |dt| dt.time())
4235 }
4236
4237 #[must_use]
4241 pub fn timetz(&self) -> Vec<Option<chrono::NaiveTime>> {
4242 self.time()
4243 }
4244
4245 #[must_use]
4250 pub fn to_julian_date(&self) -> Vec<Option<f64>> {
4251 const SECONDS_PER_DAY: f64 = 86_400.0;
4252 const UNIX_EPOCH_JD: f64 = 2_440_587.5;
4253 self.index
4254 .labels()
4255 .iter()
4256 .map(|label| match label {
4257 IndexLabel::Datetime64(nanos) if *nanos != i64::MIN => {
4258 let secs = (*nanos as f64) / 1_000_000_000.0;
4259 Some(secs / SECONDS_PER_DAY + UNIX_EPOCH_JD)
4260 }
4261 _ => None,
4262 })
4263 .collect()
4264 }
4265
4266 pub fn tz_localize(&self, tz: &str) -> Result<Self, IndexError> {
4271 match tz {
4272 "UTC" | "utc" => Ok(self.clone()),
4273 other => Err(IndexError::InvalidArgument(format!(
4274 "tz_localize: only 'UTC' is supported until timezone metadata lands; got {other:?}"
4275 ))),
4276 }
4277 }
4278
4279 pub fn tz_convert(&self, _tz: &str) -> Result<Self, IndexError> {
4283 Err(IndexError::InvalidArgument(
4284 "tz_convert: cannot convert tz-naive timestamps; call tz_localize('UTC') first"
4285 .to_owned(),
4286 ))
4287 }
4288
4289 #[must_use]
4293 pub fn tz(&self) -> Option<String> {
4294 None
4295 }
4296
4297 #[must_use]
4299 pub fn tzinfo(&self) -> Option<String> {
4300 self.tz()
4301 }
4302
4303 #[must_use]
4306 pub fn freq(&self) -> Option<String> {
4307 None
4308 }
4309
4310 #[must_use]
4312 pub fn freqstr(&self) -> Option<String> {
4313 self.freq()
4314 }
4315
4316 #[must_use]
4318 pub fn inferred_freq(&self) -> Option<String> {
4319 None
4320 }
4321
4322 pub fn as_unit(&self, unit: &str) -> Result<Self, IndexError> {
4327 match unit {
4328 "ns" => Ok(self.clone()),
4329 other => Err(IndexError::InvalidArgument(format!(
4330 "as_unit: only 'ns' is supported by FrankenPandas's Datetime64 storage; got {other:?}"
4331 ))),
4332 }
4333 }
4334
4335 #[must_use]
4338 pub fn unit(&self) -> &'static str {
4339 "ns"
4340 }
4341
4342 #[must_use]
4345 pub fn resolution(&self) -> &'static str {
4346 "nanosecond"
4347 }
4348
4349 pub fn get_loc(&self, value: i64) -> Result<usize, IndexError> {
4353 self.index
4359 .position(&IndexLabel::Datetime64(value))
4360 .ok_or_else(|| {
4361 IndexError::InvalidArgument(format!("get_loc: {value} not in DatetimeIndex"))
4362 })
4363 }
4364
4365 #[must_use]
4368 pub fn rename(&self, name: &str) -> Self {
4369 self.set_name(name)
4370 }
4371
4372 #[must_use]
4377 pub fn reindex(&self, target: &Self) -> (Self, Vec<isize>) {
4378 let labels: Vec<i64> = target
4379 .index
4380 .labels()
4381 .iter()
4382 .filter_map(|label| match label {
4383 IndexLabel::Datetime64(n) => Some(*n),
4384 _ => None,
4385 })
4386 .collect();
4387 let indexer = self.get_indexer(&labels);
4388 (target.clone(), indexer)
4389 }
4390
4391 #[must_use]
4397 pub fn get_indexer_non_unique(&self, targets: &[i64]) -> (Vec<isize>, Vec<usize>) {
4398 let labels = self.index.labels();
4399 let mut by_value = FxHashMap::<i64, Vec<usize>>::default();
4400 for (i, label) in labels.iter().enumerate() {
4401 if let IndexLabel::Datetime64(n) = label {
4402 by_value.entry(*n).or_default().push(i);
4403 }
4404 }
4405 let mut positions = Vec::<isize>::new();
4406 let mut missing = Vec::<usize>::new();
4407 for (idx, target) in targets.iter().enumerate() {
4408 if let Some(matches) = by_value.get(target) {
4409 positions.extend(
4410 matches
4411 .iter()
4412 .map(|p| isize::try_from(*p).unwrap_or(isize::MAX)),
4413 );
4414 } else {
4415 positions.push(-1);
4416 missing.push(idx);
4417 }
4418 }
4419 (positions, missing)
4420 }
4421
4422 #[must_use]
4425 pub fn get_indexer_for(&self, targets: &[i64]) -> Vec<isize> {
4426 self.get_indexer(targets)
4427 }
4428
4429 #[must_use]
4433 pub fn get_indexer(&self, targets: &[i64]) -> Vec<isize> {
4434 let labels = self.index.labels();
4435 let mut positions = FxHashMap::<i64, isize>::default();
4436 for (i, label) in labels.iter().enumerate() {
4437 if let IndexLabel::Datetime64(n) = label {
4438 positions
4439 .entry(*n)
4440 .or_insert_with(|| isize::try_from(i).unwrap_or(isize::MAX));
4441 }
4442 }
4443 targets
4444 .iter()
4445 .map(|n| positions.get(n).copied().unwrap_or(-1))
4446 .collect()
4447 }
4448
4449 pub fn get_slice_bound(&self, label: i64, side: &str) -> Result<usize, IndexError> {
4453 self.searchsorted(label, side)
4454 }
4455
4456 pub fn slice_indexer(
4460 &self,
4461 start: i64,
4462 end: i64,
4463 ) -> Result<std::ops::Range<usize>, IndexError> {
4464 let (left, right) = self.slice_locs(start, end)?;
4465 Ok(left..right)
4466 }
4467
4468 pub fn slice_locs(&self, start: i64, end: i64) -> Result<(usize, usize), IndexError> {
4472 if !self.is_monotonic_increasing() {
4473 return Err(IndexError::InvalidArgument(
4474 "slice_locs requires a monotonic increasing DatetimeIndex".to_owned(),
4475 ));
4476 }
4477 let left = self.searchsorted(start, "left")?;
4478 let right = self.searchsorted(end, "right")?;
4479 Ok((left, right))
4480 }
4481
4482 #[must_use]
4486 pub fn to_flat_index(&self) -> Index {
4487 self.index.clone()
4488 }
4489
4490 #[must_use]
4492 pub fn r#str(&self) -> IndexStringAccessor<'_> {
4493 IndexStringAccessor::owned(self.to_flat_index())
4494 }
4495
4496 #[must_use]
4498 pub fn to_frame(&self) -> Vec<Vec<IndexLabel>> {
4499 self.to_flat_index().to_frame()
4500 }
4501
4502 #[must_use]
4504 pub fn to_series(&self) -> Vec<(IndexLabel, IndexLabel)> {
4505 self.to_flat_index().to_series()
4506 }
4507
4508 #[must_use]
4510 pub fn any(&self) -> bool {
4511 self.to_flat_index().any()
4512 }
4513
4514 #[must_use]
4516 pub fn all(&self) -> bool {
4517 self.to_flat_index().all()
4518 }
4519
4520 pub fn get_level_values(&self, level: usize) -> Result<Index, IndexError> {
4522 self.to_flat_index().get_level_values(level)
4523 }
4524
4525 pub fn droplevel(&self, level: usize) -> Result<Index, IndexError> {
4527 self.to_flat_index().droplevel(level)
4528 }
4529
4530 #[must_use]
4532 pub fn groupby(&self) -> HashMap<IndexLabel, Vec<usize>> {
4533 self.to_flat_index().groupby()
4534 }
4535
4536 #[must_use]
4538 pub fn map<F>(&self, func: F) -> Index
4539 where
4540 F: Fn(&IndexLabel) -> IndexLabel,
4541 {
4542 self.to_flat_index().map(func)
4543 }
4544
4545 pub fn astype(&self, dtype: &str) -> Result<Index, IndexError> {
4547 self.to_flat_index().astype(dtype)
4548 }
4549
4550 #[must_use]
4552 pub fn asof(&self, key: &IndexLabel) -> Option<IndexLabel> {
4553 self.to_flat_index().asof(key)
4554 }
4555
4556 #[must_use]
4558 pub fn asof_locs(&self, where_index: &Index, mask: Option<&[bool]>) -> Vec<Option<usize>> {
4559 self.to_flat_index().asof_locs(where_index, mask)
4560 }
4561
4562 #[must_use]
4564 pub fn drop(&self, labels_to_drop: &[IndexLabel]) -> Index {
4565 self.to_flat_index().drop(labels_to_drop)
4566 }
4567
4568 pub fn join(&self, other: &Index, how: &str) -> Result<Index, IndexError> {
4570 self.to_flat_index().join(other, how)
4571 }
4572
4573 #[must_use]
4575 pub fn sortlevel(&self) -> (Index, Vec<usize>) {
4576 self.to_flat_index().sortlevel()
4577 }
4578
4579 #[must_use]
4583 pub fn view(&self) -> Self {
4584 self.clone()
4585 }
4586
4587 #[must_use]
4590 pub fn transpose(&self) -> Self {
4591 self.clone()
4592 }
4593
4594 #[allow(non_snake_case)]
4596 #[must_use]
4597 pub fn T(&self) -> Self {
4598 self.transpose()
4599 }
4600
4601 #[must_use]
4604 pub fn ravel(&self) -> Vec<Option<i64>> {
4605 self.values()
4606 }
4607
4608 #[must_use]
4611 pub fn nlevels(&self) -> usize {
4612 1
4613 }
4614
4615 #[must_use]
4618 pub fn infer_objects(&self) -> Self {
4619 self.clone()
4620 }
4621
4622 pub fn dropna(&self) -> Self {
4626 let surviving: Vec<i64> = self
4627 .index
4628 .labels()
4629 .iter()
4630 .filter_map(|label| match label {
4631 IndexLabel::Datetime64(nanos) if *nanos != i64::MIN => Some(*nanos),
4632 _ => None,
4633 })
4634 .collect();
4635 let mut filtered = Self::new(surviving);
4636 if let Some(name) = self.name() {
4637 filtered = filtered.set_name(name);
4638 }
4639 filtered
4640 }
4641
4642 #[must_use]
4643 pub fn year(&self) -> Vec<Option<i32>> {
4644 use chrono::Datelike;
4645 map_datetime_labels(self.index.labels(), |dt| dt.year())
4646 }
4647
4648 #[must_use]
4649 pub fn month(&self) -> Vec<Option<u32>> {
4650 use chrono::Datelike;
4651 map_datetime_labels(self.index.labels(), |dt| dt.month())
4652 }
4653
4654 #[must_use]
4655 pub fn day(&self) -> Vec<Option<u32>> {
4656 use chrono::Datelike;
4657 map_datetime_labels(self.index.labels(), |dt| dt.day())
4658 }
4659
4660 #[must_use]
4662 pub fn hour(&self) -> Vec<Option<u32>> {
4663 use chrono::Timelike;
4664 map_datetime_labels(self.index.labels(), |dt| dt.hour())
4665 }
4666
4667 #[must_use]
4669 pub fn minute(&self) -> Vec<Option<u32>> {
4670 use chrono::Timelike;
4671 map_datetime_labels(self.index.labels(), |dt| dt.minute())
4672 }
4673
4674 #[must_use]
4676 pub fn second(&self) -> Vec<Option<u32>> {
4677 use chrono::Timelike;
4678 map_datetime_labels(self.index.labels(), |dt| dt.second())
4679 }
4680
4681 #[must_use]
4684 pub fn microsecond(&self) -> Vec<Option<u32>> {
4685 use chrono::Timelike;
4686 map_datetime_labels(self.index.labels(), |dt| dt.nanosecond() / 1_000)
4687 }
4688
4689 #[must_use]
4692 pub fn nanosecond(&self) -> Vec<Option<u32>> {
4693 use chrono::Timelike;
4694 map_datetime_labels(self.index.labels(), |dt| dt.nanosecond() % 1_000)
4695 }
4696
4697 pub fn indexer_at_time(&self, time: &str) -> Result<Vec<usize>, IndexError> {
4700 let target = parse_time_of_day_nanos(time, "DatetimeIndex.indexer_at_time")?;
4701 Ok(self
4702 .index
4703 .labels()
4704 .iter()
4705 .enumerate()
4706 .filter_map(|(position, label)| {
4707 (datetime_label_time_nanos(label) == Some(target)).then_some(position)
4708 })
4709 .collect())
4710 }
4711
4712 pub fn indexer_between_time(
4716 &self,
4717 start_time: &str,
4718 end_time: &str,
4719 include_start: bool,
4720 include_end: bool,
4721 ) -> Result<Vec<usize>, IndexError> {
4722 let start =
4723 parse_time_of_day_nanos(start_time, "DatetimeIndex.indexer_between_time start_time")?;
4724 let end = parse_time_of_day_nanos(end_time, "DatetimeIndex.indexer_between_time end_time")?;
4725 Ok(self
4726 .index
4727 .labels()
4728 .iter()
4729 .enumerate()
4730 .filter_map(|(position, label)| {
4731 datetime_label_time_nanos(label)
4732 .filter(|time| {
4733 time_nanos_in_between(*time, start, end, include_start, include_end)
4734 })
4735 .map(|_| position)
4736 })
4737 .collect())
4738 }
4739
4740 #[must_use]
4743 pub fn week(&self) -> Vec<Option<u32>> {
4744 use chrono::Datelike;
4745 map_datetime_labels(self.index.labels(), |dt| dt.iso_week().week())
4746 }
4747
4748 #[must_use]
4752 pub fn isocalendar(&self) -> Vec<Option<(i32, u32, u32)>> {
4753 use chrono::Datelike;
4754 map_datetime_labels(self.index.labels(), |dt| {
4755 let iso = dt.iso_week();
4756 (iso.year(), iso.week(), dt.weekday().number_from_monday())
4757 })
4758 }
4759
4760 #[must_use]
4762 pub fn weekofyear(&self) -> Vec<Option<u32>> {
4763 self.week()
4764 }
4765
4766 #[must_use]
4768 pub fn dayofyear(&self) -> Vec<Option<u32>> {
4769 use chrono::Datelike;
4770 map_datetime_labels(self.index.labels(), |dt| dt.ordinal())
4771 }
4772
4773 #[must_use]
4775 pub fn day_of_year(&self) -> Vec<Option<u32>> {
4776 self.dayofyear()
4777 }
4778
4779 #[must_use]
4782 pub fn dayofweek(&self) -> Vec<Option<u32>> {
4783 use chrono::Datelike;
4784 map_datetime_labels(self.index.labels(), |dt| {
4785 dt.weekday().num_days_from_monday()
4786 })
4787 }
4788
4789 #[must_use]
4791 pub fn day_of_week(&self) -> Vec<Option<u32>> {
4792 self.dayofweek()
4793 }
4794
4795 #[must_use]
4797 pub fn weekday(&self) -> Vec<Option<u32>> {
4798 self.dayofweek()
4799 }
4800
4801 #[must_use]
4803 pub fn quarter(&self) -> Vec<Option<u32>> {
4804 use chrono::Datelike;
4805 map_datetime_labels(self.index.labels(), |dt| (dt.month() - 1) / 3 + 1)
4806 }
4807
4808 #[must_use]
4811 pub fn is_leap_year(&self) -> Vec<Option<bool>> {
4812 use chrono::Datelike;
4813 map_datetime_labels(self.index.labels(), |dt| {
4814 chrono::NaiveDate::from_ymd_opt(dt.year(), 1, 1).is_some_and(|d| d.leap_year())
4815 })
4816 }
4817
4818 #[must_use]
4821 pub fn days_in_month(&self) -> Vec<Option<u32>> {
4822 use chrono::Datelike;
4823 map_datetime_labels(self.index.labels(), |dt| {
4824 days_in_calendar_month(dt.year(), dt.month())
4825 })
4826 }
4827
4828 #[must_use]
4830 pub fn daysinmonth(&self) -> Vec<Option<u32>> {
4831 self.days_in_month()
4832 }
4833
4834 #[must_use]
4837 pub fn is_month_start(&self) -> Vec<Option<bool>> {
4838 use chrono::Datelike;
4839 map_datetime_labels(self.index.labels(), |dt| dt.day() == 1)
4840 }
4841
4842 #[must_use]
4845 pub fn is_month_end(&self) -> Vec<Option<bool>> {
4846 use chrono::Datelike;
4847 map_datetime_labels(self.index.labels(), |dt| {
4848 dt.day() == days_in_calendar_month(dt.year(), dt.month())
4849 })
4850 }
4851
4852 #[must_use]
4855 pub fn is_quarter_start(&self) -> Vec<Option<bool>> {
4856 use chrono::Datelike;
4857 map_datetime_labels(self.index.labels(), |dt| {
4858 matches!(dt.month(), 1 | 4 | 7 | 10) && dt.day() == 1
4859 })
4860 }
4861
4862 #[must_use]
4865 pub fn is_quarter_end(&self) -> Vec<Option<bool>> {
4866 use chrono::Datelike;
4867 map_datetime_labels(self.index.labels(), |dt| {
4868 matches!(dt.month(), 3 | 6 | 9 | 12)
4869 && dt.day() == days_in_calendar_month(dt.year(), dt.month())
4870 })
4871 }
4872
4873 #[must_use]
4876 pub fn is_year_start(&self) -> Vec<Option<bool>> {
4877 use chrono::Datelike;
4878 map_datetime_labels(self.index.labels(), |dt| dt.month() == 1 && dt.day() == 1)
4879 }
4880
4881 #[must_use]
4884 pub fn is_year_end(&self) -> Vec<Option<bool>> {
4885 use chrono::Datelike;
4886 map_datetime_labels(self.index.labels(), |dt| dt.month() == 12 && dt.day() == 31)
4887 }
4888
4889 #[must_use]
4891 pub fn month_name(&self) -> Vec<Option<String>> {
4892 use chrono::Datelike;
4893 map_datetime_labels(self.index.labels(), |dt| {
4894 month_name_english(dt.month()).to_owned()
4895 })
4896 }
4897
4898 #[must_use]
4900 pub fn day_name(&self) -> Vec<Option<String>> {
4901 use chrono::Datelike;
4902 map_datetime_labels(self.index.labels(), |dt| {
4903 weekday_name_english(dt.weekday()).to_owned()
4904 })
4905 }
4906
4907 #[must_use]
4910 pub fn normalize(&self) -> Self {
4911 let nanos: Vec<i64> = self
4912 .index
4913 .labels()
4914 .iter()
4915 .map(|label| match label {
4916 IndexLabel::Datetime64(nanos) if *nanos != i64::MIN => {
4917 let secs_per_day: i64 = 86_400;
4918 let nanos_per_day: i64 = secs_per_day * 1_000_000_000;
4919 nanos.div_euclid(nanos_per_day) * nanos_per_day
4920 }
4921 _ => i64::MIN,
4922 })
4923 .collect();
4924 let mut normalized = Self::new(nanos);
4925 if let Some(name) = self.name() {
4926 normalized = normalized.set_name(name);
4927 }
4928 normalized
4929 }
4930
4931 #[must_use]
4934 pub fn is_normalized(&self) -> bool {
4935 let nanos_per_day: i64 = 86_400 * 1_000_000_000;
4936 self.index.labels().iter().all(|label| match label {
4937 IndexLabel::Datetime64(nanos) => {
4938 *nanos == i64::MIN || nanos.rem_euclid(nanos_per_day) == 0
4939 }
4940 _ => true,
4941 })
4942 }
4943}
4944
4945fn month_name_english(month: u32) -> &'static str {
4946 match month {
4947 1 => "January",
4948 2 => "February",
4949 3 => "March",
4950 4 => "April",
4951 5 => "May",
4952 6 => "June",
4953 7 => "July",
4954 8 => "August",
4955 9 => "September",
4956 10 => "October",
4957 11 => "November",
4958 12 => "December",
4959 _ => "",
4960 }
4961}
4962
4963fn weekday_name_english(weekday: chrono::Weekday) -> &'static str {
4964 match weekday {
4965 chrono::Weekday::Mon => "Monday",
4966 chrono::Weekday::Tue => "Tuesday",
4967 chrono::Weekday::Wed => "Wednesday",
4968 chrono::Weekday::Thu => "Thursday",
4969 chrono::Weekday::Fri => "Friday",
4970 chrono::Weekday::Sat => "Saturday",
4971 chrono::Weekday::Sun => "Sunday",
4972 }
4973}
4974
4975fn days_in_calendar_month(year: i32, month: u32) -> u32 {
4976 let next_month = if month == 12 { 1 } else { month + 1 };
4977 let next_year = if month == 12 { year + 1 } else { year };
4978 let first_of_next = chrono::NaiveDate::from_ymd_opt(next_year, next_month, 1);
4979 let first_of_this = chrono::NaiveDate::from_ymd_opt(year, month, 1);
4980 match (first_of_next, first_of_this) {
4981 (Some(next), Some(this)) => (next - this).num_days() as u32,
4982 _ => 0,
4983 }
4984}
4985
4986#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
4988pub struct TimedeltaIndex {
4989 index: Index,
4990}
4991
4992impl TimedeltaIndex {
4993 #[must_use]
4994 pub fn new(nanos: Vec<i64>) -> Self {
4995 Self {
4996 index: Index::from_timedelta64(nanos),
4997 }
4998 }
4999
5000 pub fn from_index(index: Index) -> Result<Self, IndexError> {
5001 ensure_index_kind(
5002 &index,
5003 |label| matches!(label, IndexLabel::Timedelta64(_)),
5004 "TimedeltaIndex",
5005 )?;
5006 Ok(Self { index })
5007 }
5008
5009 #[must_use]
5010 pub fn as_index(&self) -> &Index {
5011 &self.index
5012 }
5013
5014 #[must_use]
5015 pub fn into_index(self) -> Index {
5016 self.index
5017 }
5018
5019 #[must_use]
5020 pub fn len(&self) -> usize {
5021 self.index.len()
5022 }
5023
5024 #[must_use]
5025 pub fn is_empty(&self) -> bool {
5026 self.index.is_empty()
5027 }
5028
5029 #[must_use]
5030 pub fn name(&self) -> Option<&str> {
5031 self.index.name()
5032 }
5033
5034 #[must_use]
5035 pub fn set_name(&self, name: &str) -> Self {
5036 Self {
5037 index: self.index.set_name(name),
5038 }
5039 }
5040
5041 #[must_use]
5042 pub fn set_names(&self, name: Option<&str>) -> Self {
5043 Self {
5044 index: self.index.set_names(name),
5045 }
5046 }
5047
5048 #[must_use]
5049 pub fn rename_index(&self, name: Option<&str>) -> Self {
5050 self.set_names(name)
5051 }
5052
5053 #[must_use]
5054 pub fn names(&self) -> Vec<Option<String>> {
5055 self.index.names()
5056 }
5057
5058 #[must_use]
5059 pub fn copy(&self) -> Self {
5060 self.clone()
5061 }
5062
5063 #[must_use]
5064 pub fn shape(&self) -> (usize,) {
5065 self.index.shape()
5066 }
5067
5068 #[must_use]
5069 pub fn size(&self) -> usize {
5070 self.index.size()
5071 }
5072
5073 #[must_use]
5074 pub fn empty(&self) -> bool {
5075 self.index.empty()
5076 }
5077
5078 #[must_use]
5079 pub fn dtype(&self) -> &'static str {
5080 "timedelta64[ns]"
5081 }
5082
5083 #[must_use]
5084 pub fn dtypes(&self) -> Vec<&'static str> {
5085 vec![self.dtype()]
5086 }
5087
5088 #[must_use]
5089 pub fn memory_usage(&self, deep: bool) -> usize {
5090 self.index.memory_usage(deep)
5091 }
5092
5093 #[must_use]
5094 pub fn nbytes(&self) -> usize {
5095 self.index.nbytes()
5096 }
5097
5098 #[must_use]
5099 pub fn hasnans(&self) -> bool {
5100 self.index.hasnans()
5101 }
5102
5103 #[must_use]
5104 pub fn isna(&self) -> Vec<bool> {
5105 self.index.isna()
5106 }
5107
5108 #[must_use]
5109 pub fn notna(&self) -> Vec<bool> {
5110 self.index.notna()
5111 }
5112
5113 #[must_use]
5114 pub fn is_unique(&self) -> bool {
5115 self.index.is_unique()
5116 }
5117
5118 #[must_use]
5119 pub fn has_duplicates(&self) -> bool {
5120 self.index.has_duplicates()
5121 }
5122
5123 #[must_use]
5124 pub fn is_monotonic_increasing(&self) -> bool {
5125 self.index.is_monotonic_increasing()
5126 }
5127
5128 #[must_use]
5129 pub fn is_monotonic(&self) -> bool {
5130 self.index.is_monotonic()
5131 }
5132
5133 #[must_use]
5134 pub fn is_monotonic_decreasing(&self) -> bool {
5135 self.index.is_monotonic_decreasing()
5136 }
5137
5138 #[must_use]
5139 pub fn nunique(&self) -> usize {
5140 self.index.nunique()
5141 }
5142
5143 #[must_use]
5144 pub fn nunique_with_dropna(&self, dropna: bool) -> usize {
5145 self.index.nunique_with_dropna(dropna)
5146 }
5147
5148 #[must_use]
5149 pub fn ndim(&self) -> usize {
5150 self.index.ndim()
5151 }
5152
5153 pub fn item(&self) -> Result<Option<i64>, IndexError> {
5154 match self.index.item()? {
5155 IndexLabel::Timedelta64(nanos) if nanos != Timedelta::NAT => Ok(Some(nanos)),
5156 IndexLabel::Timedelta64(_) => Ok(None),
5157 label => Err(IndexError::InvalidArgument(format!(
5158 "TimedeltaIndex item must be timedelta64, got {label}"
5159 ))),
5160 }
5161 }
5162
5163 #[must_use]
5164 pub fn is_(&self, other: &Self) -> bool {
5165 std::ptr::eq(self, other)
5166 }
5167
5168 #[must_use]
5169 pub fn equals(&self, other: &Self) -> bool {
5170 self.index.equals(&other.index)
5171 }
5172
5173 #[must_use]
5174 pub fn identical(&self, other: &Self) -> bool {
5175 self.index.identical(&other.index)
5176 }
5177
5178 #[must_use]
5179 pub fn holds_integer(&self) -> bool {
5180 false
5181 }
5182
5183 #[must_use]
5184 pub fn inferred_type(&self) -> &'static str {
5185 "timedelta64"
5186 }
5187
5188 #[must_use]
5189 pub fn is_boolean(&self) -> bool {
5190 false
5191 }
5192
5193 #[must_use]
5194 pub fn is_categorical(&self) -> bool {
5195 false
5196 }
5197
5198 #[must_use]
5199 pub fn is_floating(&self) -> bool {
5200 false
5201 }
5202
5203 #[must_use]
5204 pub fn is_integer(&self) -> bool {
5205 false
5206 }
5207
5208 #[must_use]
5209 pub fn is_interval(&self) -> bool {
5210 false
5211 }
5212
5213 #[must_use]
5214 pub fn is_numeric(&self) -> bool {
5215 false
5216 }
5217
5218 #[must_use]
5219 pub fn is_object(&self) -> bool {
5220 false
5221 }
5222
5223 #[must_use]
5224 pub fn nanos(&self) -> Vec<Option<i64>> {
5225 map_timedelta_labels(self.index.labels(), |nanos| nanos)
5226 }
5227
5228 #[must_use]
5229 pub fn values(&self) -> Vec<Option<i64>> {
5230 self.nanos()
5231 }
5232
5233 #[must_use]
5234 pub fn to_list(&self) -> Vec<Option<i64>> {
5235 self.nanos()
5236 }
5237
5238 #[must_use]
5239 pub fn tolist(&self) -> Vec<Option<i64>> {
5240 self.to_list()
5241 }
5242
5243 #[must_use]
5244 pub fn to_numpy(&self) -> Vec<Option<i64>> {
5245 self.nanos()
5246 }
5247
5248 #[must_use]
5249 pub fn array(&self) -> Vec<Option<i64>> {
5250 self.nanos()
5251 }
5252
5253 #[must_use]
5254 pub fn days(&self) -> Vec<Option<i64>> {
5255 map_timedelta_labels(self.index.labels(), |nanos| {
5256 nanos.div_euclid(Timedelta::NANOS_PER_DAY)
5257 })
5258 }
5259
5260 #[must_use]
5261 pub fn seconds(&self) -> Vec<Option<i64>> {
5262 map_timedelta_labels(self.index.labels(), |nanos| {
5263 nanos.rem_euclid(Timedelta::NANOS_PER_DAY) / Timedelta::NANOS_PER_SEC
5264 })
5265 }
5266
5267 #[must_use]
5268 pub fn total_seconds(&self) -> Vec<Option<f64>> {
5269 map_timedelta_labels(self.index.labels(), Timedelta::total_seconds)
5270 }
5271
5272 #[must_use]
5275 pub fn components(&self) -> Vec<Option<TimedeltaComponents>> {
5276 map_timedelta_labels(self.index.labels(), timedelta_components_for_index)
5277 }
5278
5279 #[must_use]
5282 pub fn asi8(&self) -> Vec<i64> {
5283 self.index
5284 .labels()
5285 .iter()
5286 .map(|label| match label {
5287 IndexLabel::Timedelta64(nanos) => *nanos,
5288 IndexLabel::Int64(_)
5289 | IndexLabel::Utf8(_)
5290 | IndexLabel::Datetime64(_)
5291 | IndexLabel::Null(_) => Timedelta::NAT,
5292 })
5293 .collect()
5294 }
5295
5296 #[must_use]
5299 pub fn microseconds(&self) -> Vec<Option<i64>> {
5300 map_timedelta_labels(self.index.labels(), |nanos| {
5301 nanos.rem_euclid(Timedelta::NANOS_PER_SEC) / 1_000
5302 })
5303 }
5304
5305 #[must_use]
5308 pub fn nanoseconds(&self) -> Vec<Option<i64>> {
5309 map_timedelta_labels(self.index.labels(), |nanos| nanos.rem_euclid(1_000))
5310 }
5311
5312 pub fn argmax(&self) -> Result<usize, IndexError> {
5316 let labels = self.index.labels();
5317 let mut best: Option<usize> = None;
5318 for (i, label) in labels.iter().enumerate() {
5319 let nanos = match label {
5320 IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => *n,
5321 _ => continue,
5322 };
5323 best = Some(match best {
5324 Some(b) => match labels[b] {
5325 IndexLabel::Timedelta64(prev) if nanos > prev => i,
5326 _ => b,
5327 },
5328 None => i,
5329 });
5330 }
5331 best.ok_or_else(|| {
5332 IndexError::InvalidArgument("attempt to get argmax of an empty sequence".to_owned())
5333 })
5334 }
5335
5336 pub fn argmin(&self) -> Result<usize, IndexError> {
5338 let labels = self.index.labels();
5339 let mut best: Option<usize> = None;
5340 for (i, label) in labels.iter().enumerate() {
5341 let nanos = match label {
5342 IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => *n,
5343 _ => continue,
5344 };
5345 best = Some(match best {
5346 Some(b) => match labels[b] {
5347 IndexLabel::Timedelta64(prev) if nanos < prev => i,
5348 _ => b,
5349 },
5350 None => i,
5351 });
5352 }
5353 best.ok_or_else(|| {
5354 IndexError::InvalidArgument("attempt to get argmin of an empty sequence".to_owned())
5355 })
5356 }
5357
5358 #[must_use]
5361 pub fn argsort(&self) -> Vec<usize> {
5362 self.index.argsort()
5363 }
5364
5365 pub fn unique(&self) -> Result<Self, IndexError> {
5367 Self::from_index(self.index.unique())
5368 }
5369
5370 pub fn factorize(&self) -> Result<(Vec<isize>, Self), IndexError> {
5373 let (codes, uniques) = self.index.factorize();
5374 Ok((codes, Self::from_index(uniques)?))
5375 }
5376
5377 #[must_use]
5380 pub fn value_counts(&self) -> Vec<(IndexLabel, usize)> {
5381 self.index.value_counts()
5382 }
5383
5384 #[must_use]
5387 pub fn duplicated(&self, keep: DuplicateKeep) -> Vec<bool> {
5388 self.index.duplicated(keep)
5389 }
5390
5391 pub fn drop_duplicates(&self) -> Result<Self, IndexError> {
5393 Self::from_index(self.index.drop_duplicates())
5394 }
5395
5396 pub fn r#where(&self, cond: &[bool], other: i64) -> Result<Self, IndexError> {
5400 let labels = self.index.labels();
5401 if cond.len() != labels.len() {
5402 return Err(IndexError::LengthMismatch {
5403 expected: labels.len(),
5404 actual: cond.len(),
5405 context: "where: cond length must match index length".to_owned(),
5406 });
5407 }
5408 let nanos: Vec<i64> = labels
5409 .iter()
5410 .zip(cond.iter())
5411 .map(|(label, &keep)| {
5412 if keep {
5413 match label {
5414 IndexLabel::Timedelta64(n) => *n,
5415 _ => Timedelta::NAT,
5416 }
5417 } else {
5418 other
5419 }
5420 })
5421 .collect();
5422 let mut out = Self::new(nanos);
5423 if let Some(name) = self.name() {
5424 out = out.set_name(name);
5425 }
5426 Ok(out)
5427 }
5428
5429 pub fn putmask(&self, mask: &[bool], value: i64) -> Result<Self, IndexError> {
5432 let labels = self.index.labels();
5433 if mask.len() != labels.len() {
5434 return Err(IndexError::LengthMismatch {
5435 expected: labels.len(),
5436 actual: mask.len(),
5437 context: "putmask: mask length must match index length".to_owned(),
5438 });
5439 }
5440 let nanos: Vec<i64> = labels
5441 .iter()
5442 .zip(mask.iter())
5443 .map(|(label, &replace)| {
5444 if replace {
5445 value
5446 } else {
5447 match label {
5448 IndexLabel::Timedelta64(n) => *n,
5449 _ => Timedelta::NAT,
5450 }
5451 }
5452 })
5453 .collect();
5454 let mut out = Self::new(nanos);
5455 if let Some(name) = self.name() {
5456 out = out.set_name(name);
5457 }
5458 Ok(out)
5459 }
5460
5461 pub fn searchsorted(&self, value: i64, side: &str) -> Result<usize, IndexError> {
5465 self.index
5466 .searchsorted(&IndexLabel::Timedelta64(value), side)
5467 }
5468
5469 pub fn insert(&self, loc: usize, value: i64) -> Result<Self, IndexError> {
5473 let labels = self.index.labels();
5474 if loc > labels.len() {
5475 return Err(IndexError::OutOfBounds {
5476 position: loc,
5477 length: labels.len(),
5478 });
5479 }
5480 let mut nanos: Vec<i64> = labels
5481 .iter()
5482 .filter_map(|label| match label {
5483 IndexLabel::Timedelta64(n) => Some(*n),
5484 _ => None,
5485 })
5486 .collect();
5487 nanos.insert(loc, value);
5488 let mut out = Self::new(nanos);
5489 if let Some(name) = self.name() {
5490 out = out.set_name(name);
5491 }
5492 Ok(out)
5493 }
5494
5495 #[must_use]
5499 pub fn format(&self) -> Vec<String> {
5500 self.index
5501 .labels()
5502 .iter()
5503 .map(|label| match label {
5504 IndexLabel::Timedelta64(nanos) if *nanos != Timedelta::NAT => nanos.to_string(),
5505 _ => "NaT".to_owned(),
5506 })
5507 .collect()
5508 }
5509
5510 #[must_use]
5513 pub fn fillna(&self, value: i64) -> Self {
5514 let nanos: Vec<i64> = self
5515 .index
5516 .labels()
5517 .iter()
5518 .map(|label| match label {
5519 IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => *n,
5520 _ => value,
5521 })
5522 .collect();
5523 let mut out = Self::new(nanos);
5524 if let Some(name) = self.name() {
5525 out = out.set_name(name);
5526 }
5527 out
5528 }
5529
5530 #[must_use]
5532 pub fn isnull(&self) -> Vec<bool> {
5533 self.isna()
5534 }
5535
5536 #[must_use]
5538 pub fn notnull(&self) -> Vec<bool> {
5539 self.notna()
5540 }
5541
5542 #[must_use]
5545 pub fn to_pytimedelta(&self) -> Vec<Option<chrono::Duration>> {
5546 self.index
5547 .labels()
5548 .iter()
5549 .map(|label| match label {
5550 IndexLabel::Timedelta64(nanos) if *nanos != Timedelta::NAT => {
5551 Some(chrono::Duration::nanoseconds(*nanos))
5552 }
5553 _ => None,
5554 })
5555 .collect()
5556 }
5557
5558 #[must_use]
5561 pub fn freq(&self) -> Option<String> {
5562 None
5563 }
5564
5565 #[must_use]
5567 pub fn freqstr(&self) -> Option<String> {
5568 self.freq()
5569 }
5570
5571 #[must_use]
5573 pub fn inferred_freq(&self) -> Option<String> {
5574 None
5575 }
5576
5577 pub fn as_unit(&self, unit: &str) -> Result<Self, IndexError> {
5580 match unit {
5581 "ns" => Ok(self.clone()),
5582 other => Err(IndexError::InvalidArgument(format!(
5583 "as_unit: only 'ns' is supported by FrankenPandas's Timedelta64 storage; got {other:?}"
5584 ))),
5585 }
5586 }
5587
5588 #[must_use]
5591 pub fn unit(&self) -> &'static str {
5592 "ns"
5593 }
5594
5595 #[must_use]
5598 pub fn resolution(&self) -> &'static str {
5599 "nanosecond"
5600 }
5601
5602 pub fn get_loc(&self, value: i64) -> Result<usize, IndexError> {
5604 self.index
5608 .position(&IndexLabel::Timedelta64(value))
5609 .ok_or_else(|| {
5610 IndexError::InvalidArgument(format!("get_loc: {value} not in TimedeltaIndex"))
5611 })
5612 }
5613
5614 #[must_use]
5616 pub fn rename(&self, name: &str) -> Self {
5617 self.set_name(name)
5618 }
5619
5620 #[must_use]
5623 pub fn reindex(&self, target: &Self) -> (Self, Vec<isize>) {
5624 let labels: Vec<i64> = target
5625 .index
5626 .labels()
5627 .iter()
5628 .filter_map(|label| match label {
5629 IndexLabel::Timedelta64(n) => Some(*n),
5630 _ => None,
5631 })
5632 .collect();
5633 let indexer = self.get_indexer(&labels);
5634 (target.clone(), indexer)
5635 }
5636
5637 #[must_use]
5640 pub fn get_indexer_non_unique(&self, targets: &[i64]) -> (Vec<isize>, Vec<usize>) {
5641 let labels = self.index.labels();
5642 let mut by_value = FxHashMap::<i64, Vec<usize>>::default();
5643 for (i, label) in labels.iter().enumerate() {
5644 if let IndexLabel::Timedelta64(n) = label {
5645 by_value.entry(*n).or_default().push(i);
5646 }
5647 }
5648 let mut positions = Vec::<isize>::new();
5649 let mut missing = Vec::<usize>::new();
5650 for (idx, target) in targets.iter().enumerate() {
5651 if let Some(matches) = by_value.get(target) {
5652 positions.extend(
5653 matches
5654 .iter()
5655 .map(|p| isize::try_from(*p).unwrap_or(isize::MAX)),
5656 );
5657 } else {
5658 positions.push(-1);
5659 missing.push(idx);
5660 }
5661 }
5662 (positions, missing)
5663 }
5664
5665 #[must_use]
5668 pub fn get_indexer_for(&self, targets: &[i64]) -> Vec<isize> {
5669 self.get_indexer(targets)
5670 }
5671
5672 #[must_use]
5675 pub fn get_indexer(&self, targets: &[i64]) -> Vec<isize> {
5676 let labels = self.index.labels();
5677 let mut positions = FxHashMap::<i64, isize>::default();
5678 for (i, label) in labels.iter().enumerate() {
5679 if let IndexLabel::Timedelta64(n) = label {
5680 positions
5681 .entry(*n)
5682 .or_insert_with(|| isize::try_from(i).unwrap_or(isize::MAX));
5683 }
5684 }
5685 targets
5686 .iter()
5687 .map(|n| positions.get(n).copied().unwrap_or(-1))
5688 .collect()
5689 }
5690
5691 pub fn get_slice_bound(&self, label: i64, side: &str) -> Result<usize, IndexError> {
5695 self.searchsorted(label, side)
5696 }
5697
5698 pub fn slice_indexer(
5701 &self,
5702 start: i64,
5703 end: i64,
5704 ) -> Result<std::ops::Range<usize>, IndexError> {
5705 let (left, right) = self.slice_locs(start, end)?;
5706 Ok(left..right)
5707 }
5708
5709 pub fn slice_locs(&self, start: i64, end: i64) -> Result<(usize, usize), IndexError> {
5713 if !self.is_monotonic_increasing() {
5714 return Err(IndexError::InvalidArgument(
5715 "slice_locs requires a monotonic increasing TimedeltaIndex".to_owned(),
5716 ));
5717 }
5718 let left = self.searchsorted(start, "left")?;
5719 let right = self.searchsorted(end, "right")?;
5720 Ok((left, right))
5721 }
5722
5723 #[must_use]
5726 pub fn to_flat_index(&self) -> Index {
5727 self.index.clone()
5728 }
5729
5730 #[must_use]
5732 pub fn r#str(&self) -> IndexStringAccessor<'_> {
5733 IndexStringAccessor::owned(self.to_flat_index())
5734 }
5735
5736 #[must_use]
5738 pub fn to_frame(&self) -> Vec<Vec<IndexLabel>> {
5739 self.to_flat_index().to_frame()
5740 }
5741
5742 #[must_use]
5744 pub fn to_series(&self) -> Vec<(IndexLabel, IndexLabel)> {
5745 self.to_flat_index().to_series()
5746 }
5747
5748 #[must_use]
5750 pub fn any(&self) -> bool {
5751 self.to_flat_index().any()
5752 }
5753
5754 #[must_use]
5756 pub fn all(&self) -> bool {
5757 self.to_flat_index().all()
5758 }
5759
5760 pub fn get_level_values(&self, level: usize) -> Result<Index, IndexError> {
5762 self.to_flat_index().get_level_values(level)
5763 }
5764
5765 pub fn droplevel(&self, level: usize) -> Result<Index, IndexError> {
5767 self.to_flat_index().droplevel(level)
5768 }
5769
5770 #[must_use]
5772 pub fn groupby(&self) -> HashMap<IndexLabel, Vec<usize>> {
5773 self.to_flat_index().groupby()
5774 }
5775
5776 #[must_use]
5778 pub fn map<F>(&self, func: F) -> Index
5779 where
5780 F: Fn(&IndexLabel) -> IndexLabel,
5781 {
5782 self.to_flat_index().map(func)
5783 }
5784
5785 pub fn astype(&self, dtype: &str) -> Result<Index, IndexError> {
5787 self.to_flat_index().astype(dtype)
5788 }
5789
5790 #[must_use]
5792 pub fn asof(&self, key: &IndexLabel) -> Option<IndexLabel> {
5793 self.to_flat_index().asof(key)
5794 }
5795
5796 #[must_use]
5798 pub fn asof_locs(&self, where_index: &Index, mask: Option<&[bool]>) -> Vec<Option<usize>> {
5799 self.to_flat_index().asof_locs(where_index, mask)
5800 }
5801
5802 #[must_use]
5804 pub fn drop(&self, labels_to_drop: &[IndexLabel]) -> Index {
5805 self.to_flat_index().drop(labels_to_drop)
5806 }
5807
5808 pub fn join(&self, other: &Index, how: &str) -> Result<Index, IndexError> {
5810 self.to_flat_index().join(other, how)
5811 }
5812
5813 #[must_use]
5815 pub fn sortlevel(&self) -> (Index, Vec<usize>) {
5816 self.to_flat_index().sortlevel()
5817 }
5818
5819 #[must_use]
5821 pub fn view(&self) -> Self {
5822 self.clone()
5823 }
5824
5825 #[must_use]
5828 pub fn transpose(&self) -> Self {
5829 self.clone()
5830 }
5831
5832 #[allow(non_snake_case)]
5834 #[must_use]
5835 pub fn T(&self) -> Self {
5836 self.transpose()
5837 }
5838
5839 #[must_use]
5842 pub fn ravel(&self) -> Vec<Option<i64>> {
5843 self.values()
5844 }
5845
5846 #[must_use]
5848 pub fn nlevels(&self) -> usize {
5849 1
5850 }
5851
5852 #[must_use]
5855 pub fn infer_objects(&self) -> Self {
5856 self.clone()
5857 }
5858
5859 pub fn dropna(&self) -> Self {
5861 let surviving: Vec<i64> = self
5862 .index
5863 .labels()
5864 .iter()
5865 .filter_map(|label| match label {
5866 IndexLabel::Timedelta64(nanos) if *nanos != Timedelta::NAT => Some(*nanos),
5867 _ => None,
5868 })
5869 .collect();
5870 let mut filtered = Self::new(surviving);
5871 if let Some(name) = self.name() {
5872 filtered = filtered.set_name(name);
5873 }
5874 filtered
5875 }
5876
5877 pub fn take(&self, positions: &[usize]) -> Result<Self, IndexError> {
5881 let labels = self.index.labels();
5882 for &p in positions {
5883 if p >= labels.len() {
5884 return Err(IndexError::OutOfBounds {
5885 position: p,
5886 length: labels.len(),
5887 });
5888 }
5889 }
5890 let nanos: Vec<i64> = positions
5891 .iter()
5892 .map(|&p| match labels[p] {
5893 IndexLabel::Timedelta64(n) => n,
5894 _ => Timedelta::NAT,
5895 })
5896 .collect();
5897 let mut out = Self::new(nanos);
5898 if let Some(name) = self.name() {
5899 out = out.set_name(name);
5900 }
5901 Ok(out)
5902 }
5903
5904 #[must_use]
5907 pub fn repeat(&self, repeats: usize) -> Self {
5908 let mut out = Vec::with_capacity(self.len() * repeats);
5909 for label in self.index.labels() {
5910 if let IndexLabel::Timedelta64(n) = label {
5911 for _ in 0..repeats {
5912 out.push(*n);
5913 }
5914 }
5915 }
5916 let mut result = Self::new(out);
5917 if let Some(name) = self.name() {
5918 result = result.set_name(name);
5919 }
5920 result
5921 }
5922
5923 #[must_use]
5927 pub fn isin(&self, values: &[i64]) -> Vec<bool> {
5928 let needle: FxHashSet<i64> = values.iter().copied().collect();
5929 self.index
5930 .labels()
5931 .iter()
5932 .map(|label| match label {
5933 IndexLabel::Timedelta64(n) => needle.contains(n),
5934 _ => false,
5935 })
5936 .collect()
5937 }
5938
5939 #[must_use]
5943 pub fn append(&self, other: &Self) -> Self {
5944 let mut nanos: Vec<i64> = self
5945 .index
5946 .labels()
5947 .iter()
5948 .filter_map(|label| match label {
5949 IndexLabel::Timedelta64(n) => Some(*n),
5950 _ => None,
5951 })
5952 .collect();
5953 nanos.extend(other.index.labels().iter().filter_map(|label| match label {
5954 IndexLabel::Timedelta64(n) => Some(*n),
5955 _ => None,
5956 }));
5957 let mut out = Self::new(nanos);
5958 if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
5959 out = out.set_name(name);
5960 }
5961 out
5962 }
5963
5964 #[must_use]
5966 pub fn min(&self) -> Option<i64> {
5967 self.index
5968 .labels()
5969 .iter()
5970 .filter_map(|label| match label {
5971 IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => Some(*n),
5972 _ => None,
5973 })
5974 .min()
5975 }
5976
5977 #[must_use]
5980 pub fn shift(&self, periods: i64, freq_nanos: i64) -> Self {
5981 let delta = periods.saturating_mul(freq_nanos);
5982 let nanos: Vec<i64> = self
5983 .index
5984 .labels()
5985 .iter()
5986 .map(|label| match label {
5987 IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => n.saturating_add(delta),
5988 _ => Timedelta::NAT,
5989 })
5990 .collect();
5991 let mut out = Self::new(nanos);
5992 if let Some(name) = self.name() {
5993 out = out.set_name(name);
5994 }
5995 out
5996 }
5997
5998 #[must_use]
6002 pub fn diff(&self, periods: i64) -> Self {
6003 let labels = self.index.labels();
6004 optional_diffs_to_timedelta_index(
6005 positional_diff(labels.len(), periods, |current, previous| {
6006 match (&labels[current], &labels[previous]) {
6007 (
6008 IndexLabel::Timedelta64(current_nanos),
6009 IndexLabel::Timedelta64(previous_nanos),
6010 ) if *current_nanos != Timedelta::NAT && *previous_nanos != Timedelta::NAT => {
6011 current_nanos.checked_sub(*previous_nanos)
6012 }
6013 _ => None,
6014 }
6015 }),
6016 self.name(),
6017 )
6018 }
6019
6020 fn round_fixed_freq(&self, freq: &str, mode: TemporalRoundMode) -> Result<Self, IndexError> {
6021 let unit_nanos = parse_fixed_temporal_freq(freq, "TimedeltaIndex rounding")?;
6022 let nanos: Vec<i64> = self
6023 .index
6024 .labels()
6025 .iter()
6026 .map(|label| match label {
6027 IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => {
6028 round_nanos_to_unit(*n, unit_nanos, mode)
6029 }
6030 _ => Timedelta::NAT,
6031 })
6032 .collect();
6033 let mut out = Self::new(nanos);
6034 if let Some(name) = self.name() {
6035 out = out.set_name(name);
6036 }
6037 Ok(out)
6038 }
6039
6040 pub fn floor(&self, freq: &str) -> Result<Self, IndexError> {
6042 self.round_fixed_freq(freq, TemporalRoundMode::Floor)
6043 }
6044
6045 pub fn ceil(&self, freq: &str) -> Result<Self, IndexError> {
6047 self.round_fixed_freq(freq, TemporalRoundMode::Ceil)
6048 }
6049
6050 pub fn round(&self, freq: &str) -> Result<Self, IndexError> {
6052 self.round_fixed_freq(freq, TemporalRoundMode::Round)
6053 }
6054
6055 #[must_use]
6058 pub fn mean(&self) -> Option<i64> {
6059 let mut total: i128 = 0;
6060 let mut count: i128 = 0;
6061 for label in self.index.labels() {
6062 if let IndexLabel::Timedelta64(n) = label
6063 && *n != Timedelta::NAT
6064 {
6065 total += i128::from(*n);
6066 count += 1;
6067 }
6068 }
6069 if count == 0 {
6070 return None;
6071 }
6072 i64::try_from(total / count).ok()
6073 }
6074
6075 #[must_use]
6080 pub fn sum(&self) -> Option<i64> {
6081 let mut total: i128 = 0;
6082 for label in self.index.labels() {
6083 if let IndexLabel::Timedelta64(n) = label
6084 && *n != Timedelta::NAT
6085 {
6086 total += i128::from(*n);
6087 }
6088 }
6089 i64::try_from(total).ok()
6090 }
6091
6092 #[must_use]
6096 pub fn var(&self) -> Option<f64> {
6097 let nanos: Vec<f64> = self
6098 .index
6099 .labels()
6100 .iter()
6101 .filter_map(|label| match label {
6102 IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => Some(*n as f64),
6103 _ => None,
6104 })
6105 .collect();
6106 if nanos.len() < 2 {
6107 return None;
6108 }
6109 let mean = nanos.iter().sum::<f64>() / nanos.len() as f64;
6110 Some(nanos.iter().map(|n| (n - mean).powi(2)).sum::<f64>() / (nanos.len() as f64 - 1.0))
6111 }
6112
6113 #[must_use]
6117 pub fn std(&self) -> Option<i64> {
6118 let nanos: Vec<f64> = self
6119 .index
6120 .labels()
6121 .iter()
6122 .filter_map(|label| match label {
6123 IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => Some(*n as f64),
6124 _ => None,
6125 })
6126 .collect();
6127 if nanos.len() < 2 {
6128 return None;
6129 }
6130 let mean = nanos.iter().sum::<f64>() / nanos.len() as f64;
6131 let var =
6132 nanos.iter().map(|n| (n - mean).powi(2)).sum::<f64>() / (nanos.len() as f64 - 1.0);
6133 Some(var.sqrt() as i64)
6134 }
6135
6136 #[must_use]
6138 pub fn median(&self) -> Option<i64> {
6139 let mut nanos: Vec<i64> = self
6140 .index
6141 .labels()
6142 .iter()
6143 .filter_map(|label| match label {
6144 IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => Some(*n),
6145 _ => None,
6146 })
6147 .collect();
6148 if nanos.is_empty() {
6149 return None;
6150 }
6151 nanos.sort_unstable();
6152 let mid = nanos.len() / 2;
6153 if nanos.len() % 2 == 1 {
6154 Some(nanos[mid])
6155 } else {
6156 let total = i128::from(nanos[mid - 1]) + i128::from(nanos[mid]);
6157 i64::try_from(total / 2).ok()
6158 }
6159 }
6160
6161 #[must_use]
6163 pub fn max(&self) -> Option<i64> {
6164 self.index
6165 .labels()
6166 .iter()
6167 .filter_map(|label| match label {
6168 IndexLabel::Timedelta64(n) if *n != Timedelta::NAT => Some(*n),
6169 _ => None,
6170 })
6171 .max()
6172 }
6173
6174 #[must_use]
6177 pub fn intersection(&self, other: &Self) -> Self {
6178 let other_set: FxHashSet<i64> = other
6179 .index
6180 .labels()
6181 .iter()
6182 .filter_map(|label| match label {
6183 IndexLabel::Timedelta64(n) => Some(*n),
6184 _ => None,
6185 })
6186 .collect();
6187 let mut seen = FxHashSet::<i64>::default();
6188 let nanos: Vec<i64> = self
6189 .index
6190 .labels()
6191 .iter()
6192 .filter_map(|label| match label {
6193 IndexLabel::Timedelta64(n) if other_set.contains(n) && seen.insert(*n) => Some(*n),
6194 _ => None,
6195 })
6196 .collect();
6197 let mut out = Self::new(nanos);
6198 if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
6199 out = out.set_name(name);
6200 }
6201 out
6202 }
6203
6204 #[must_use]
6207 pub fn union(&self, other: &Self) -> Self {
6208 let mut seen = FxHashSet::<i64>::default();
6209 let mut nanos: Vec<i64> = Vec::new();
6210 for label in self
6211 .index
6212 .labels()
6213 .iter()
6214 .chain(other.index.labels().iter())
6215 {
6216 if let IndexLabel::Timedelta64(n) = label
6217 && seen.insert(*n)
6218 {
6219 nanos.push(*n);
6220 }
6221 }
6222 let mut out = Self::new(nanos);
6223 if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
6224 out = out.set_name(name);
6225 }
6226 out
6227 }
6228
6229 #[must_use]
6232 pub fn difference(&self, other: &Self) -> Self {
6233 let other_set: FxHashSet<i64> = other
6234 .index
6235 .labels()
6236 .iter()
6237 .filter_map(|label| match label {
6238 IndexLabel::Timedelta64(n) => Some(*n),
6239 _ => None,
6240 })
6241 .collect();
6242 let mut seen = FxHashSet::<i64>::default();
6243 let nanos: Vec<i64> = self
6244 .index
6245 .labels()
6246 .iter()
6247 .filter_map(|label| match label {
6248 IndexLabel::Timedelta64(n) if !other_set.contains(n) && seen.insert(*n) => Some(*n),
6249 _ => None,
6250 })
6251 .collect();
6252 let mut out = Self::new(nanos);
6253 if let Some(name) = self.name() {
6256 out = out.set_name(name);
6257 }
6258 out
6259 }
6260
6261 #[must_use]
6264 pub fn symmetric_difference(&self, other: &Self) -> Self {
6265 let self_set: FxHashSet<i64> = self
6266 .index
6267 .labels()
6268 .iter()
6269 .filter_map(|label| match label {
6270 IndexLabel::Timedelta64(n) => Some(*n),
6271 _ => None,
6272 })
6273 .collect();
6274 let other_set: FxHashSet<i64> = other
6275 .index
6276 .labels()
6277 .iter()
6278 .filter_map(|label| match label {
6279 IndexLabel::Timedelta64(n) => Some(*n),
6280 _ => None,
6281 })
6282 .collect();
6283 let mut seen = FxHashSet::<i64>::default();
6284 let mut nanos: Vec<i64> = Vec::new();
6285 for label in self.index.labels() {
6286 if let IndexLabel::Timedelta64(n) = label
6287 && !other_set.contains(n)
6288 && seen.insert(*n)
6289 {
6290 nanos.push(*n);
6291 }
6292 }
6293 for label in other.index.labels() {
6294 if let IndexLabel::Timedelta64(n) = label
6295 && !self_set.contains(n)
6296 && seen.insert(*n)
6297 {
6298 nanos.push(*n);
6299 }
6300 }
6301 let mut out = Self::new(nanos);
6302 if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
6303 out = out.set_name(name);
6304 }
6305 out
6306 }
6307
6308 #[must_use]
6312 pub fn sort_values(&self) -> Self {
6313 let mut nanos: Vec<i64> = self
6314 .index
6315 .labels()
6316 .iter()
6317 .filter_map(|label| match label {
6318 IndexLabel::Timedelta64(n) => Some(*n),
6319 _ => None,
6320 })
6321 .collect();
6322 nanos.sort_unstable();
6323 let mut out = Self::new(nanos);
6324 if let Some(name) = self.name() {
6325 out = out.set_name(name);
6326 }
6327 out
6328 }
6329
6330 #[must_use]
6332 pub fn sort(&self) -> Self {
6333 self.sort_values()
6334 }
6335
6336 pub fn delete(&self, loc: usize) -> Result<Self, IndexError> {
6339 let labels = self.index.labels();
6340 if loc >= labels.len() {
6341 return Err(IndexError::OutOfBounds {
6342 position: loc,
6343 length: labels.len(),
6344 });
6345 }
6346 let nanos: Vec<i64> = labels
6347 .iter()
6348 .enumerate()
6349 .filter(|(i, _)| *i != loc)
6350 .filter_map(|(_, label)| match label {
6351 IndexLabel::Timedelta64(n) => Some(*n),
6352 _ => None,
6353 })
6354 .collect();
6355 let mut out = Self::new(nanos);
6356 if let Some(name) = self.name() {
6357 out = out.set_name(name);
6358 }
6359 Ok(out)
6360 }
6361}
6362
6363#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
6369pub struct PeriodIndex {
6370 values: Vec<Period>,
6371 name: Option<String>,
6372}
6373
6374impl PeriodIndex {
6375 #[must_use]
6376 pub fn new(values: Vec<Period>) -> Self {
6377 Self { values, name: None }
6378 }
6379
6380 #[must_use]
6383 pub fn from_ordinals(ordinals: &[i64], freq: PeriodFreq) -> Self {
6384 let values: Vec<Period> = ordinals
6385 .iter()
6386 .map(|&ordinal| Period::new(ordinal, freq))
6387 .collect();
6388 Self { values, name: None }
6389 }
6390
6391 pub fn from_fields(fields: PeriodFields<'_>) -> Result<Self, IndexError> {
6392 validate_period_fields(&fields)?;
6393 let freq = period_fields_freq(&fields)?;
6394 let values = (0..fields.year.len())
6395 .map(|position| period_from_fields_at(&fields, freq, position))
6396 .collect::<Result<Vec<_>, _>>()?;
6397 Ok(Self { values, name: None })
6398 }
6399
6400 #[must_use]
6401 pub fn from_range(start: Period, periods: usize) -> Self {
6402 Self::new(fp_types::period_range(start, periods))
6403 }
6404
6405 #[must_use]
6406 pub fn values(&self) -> &[Period] {
6407 &self.values
6408 }
6409
6410 #[must_use]
6411 pub fn len(&self) -> usize {
6412 self.values.len()
6413 }
6414
6415 #[must_use]
6416 pub fn is_empty(&self) -> bool {
6417 self.values.is_empty()
6418 }
6419
6420 #[must_use]
6421 pub fn name(&self) -> Option<&str> {
6422 self.name.as_deref()
6423 }
6424
6425 #[must_use]
6426 pub fn set_name(&self, name: &str) -> Self {
6427 let mut out = self.clone();
6428 out.name = Some(name.to_owned());
6429 out
6430 }
6431
6432 #[must_use]
6433 pub fn set_names(&self, name: Option<&str>) -> Self {
6434 let mut out = self.clone();
6435 out.name = name.map(str::to_owned);
6436 out
6437 }
6438
6439 #[must_use]
6440 pub fn rename_index(&self, name: Option<&str>) -> Self {
6441 self.set_names(name)
6442 }
6443
6444 #[must_use]
6445 pub fn names(&self) -> Vec<Option<String>> {
6446 vec![self.name.clone()]
6447 }
6448
6449 #[must_use]
6450 pub fn copy(&self) -> Self {
6451 self.clone()
6452 }
6453
6454 #[must_use]
6455 pub fn shape(&self) -> (usize,) {
6456 (self.len(),)
6457 }
6458
6459 #[must_use]
6460 pub fn size(&self) -> usize {
6461 self.len()
6462 }
6463
6464 #[must_use]
6465 pub fn empty(&self) -> bool {
6466 self.is_empty()
6467 }
6468
6469 #[must_use]
6470 pub fn dtype(&self) -> String {
6471 self.freq().map_or_else(
6472 || "period[unknown]".to_owned(),
6473 |freq| format!("period[{freq}]"),
6474 )
6475 }
6476
6477 #[must_use]
6478 pub fn dtypes(&self) -> Vec<String> {
6479 vec![self.dtype()]
6480 }
6481
6482 #[must_use]
6487 pub fn hasnans(&self) -> bool {
6488 false
6489 }
6490
6491 #[must_use]
6493 pub fn isna(&self) -> Vec<bool> {
6494 vec![false; self.len()]
6495 }
6496
6497 #[must_use]
6499 pub fn isnull(&self) -> Vec<bool> {
6500 self.isna()
6501 }
6502
6503 #[must_use]
6505 pub fn notna(&self) -> Vec<bool> {
6506 vec![true; self.len()]
6507 }
6508
6509 #[must_use]
6511 pub fn notnull(&self) -> Vec<bool> {
6512 self.notna()
6513 }
6514
6515 #[must_use]
6519 pub fn dropna(&self) -> Self {
6520 self.clone()
6521 }
6522
6523 #[must_use]
6524 pub fn memory_usage(&self, deep: bool) -> usize {
6525 let name_bytes = if deep {
6526 self.name.as_ref().map_or(0, String::len)
6527 } else {
6528 0
6529 };
6530 self.values.len() * std::mem::size_of::<Period>() + name_bytes
6531 }
6532
6533 #[must_use]
6534 pub fn nbytes(&self) -> usize {
6535 self.memory_usage(false)
6536 }
6537
6538 fn compare_periods(left: &Period, right: &Period) -> std::cmp::Ordering {
6539 left.cmp_same_freq(right).unwrap_or_else(|| {
6540 left.freq
6541 .cmp(&right.freq)
6542 .then(left.ordinal.cmp(&right.ordinal))
6543 })
6544 }
6545
6546 #[must_use]
6547 pub fn is_unique(&self) -> bool {
6548 let unique: FxHashSet<&Period> = self.values.iter().collect();
6549 unique.len() == self.values.len()
6550 }
6551
6552 #[must_use]
6553 pub fn has_duplicates(&self) -> bool {
6554 !self.is_unique()
6555 }
6556
6557 #[must_use]
6558 pub fn is_monotonic_increasing(&self) -> bool {
6559 self.values
6560 .windows(2)
6561 .all(|window| Self::compare_periods(&window[0], &window[1]).is_le())
6562 }
6563
6564 #[must_use]
6565 pub fn is_monotonic(&self) -> bool {
6566 self.is_monotonic_increasing()
6567 }
6568
6569 #[must_use]
6570 pub fn is_monotonic_decreasing(&self) -> bool {
6571 self.values
6572 .windows(2)
6573 .all(|window| Self::compare_periods(&window[0], &window[1]).is_ge())
6574 }
6575
6576 #[must_use]
6577 pub fn nunique(&self) -> usize {
6578 self.values.iter().collect::<FxHashSet<_>>().len()
6579 }
6580
6581 #[must_use]
6582 pub fn ndim(&self) -> usize {
6583 1
6584 }
6585
6586 pub fn item(&self) -> Result<Period, IndexError> {
6587 if self.values.len() == 1 {
6588 Ok(self.values[0])
6589 } else {
6590 Err(IndexError::InvalidArgument(format!(
6591 "item requires exactly one label, got {}",
6592 self.values.len()
6593 )))
6594 }
6595 }
6596
6597 #[must_use]
6598 pub fn is_(&self, other: &Self) -> bool {
6599 std::ptr::eq(self, other)
6600 }
6601
6602 #[must_use]
6603 pub fn equals(&self, other: &Self) -> bool {
6604 self.values == other.values
6605 }
6606
6607 #[must_use]
6608 pub fn identical(&self, other: &Self) -> bool {
6609 self.equals(other) && self.name == other.name
6610 }
6611
6612 #[must_use]
6613 pub fn holds_integer(&self) -> bool {
6614 false
6615 }
6616
6617 #[must_use]
6618 pub fn inferred_type(&self) -> &'static str {
6619 "period"
6620 }
6621
6622 #[must_use]
6623 pub fn is_boolean(&self) -> bool {
6624 false
6625 }
6626
6627 #[must_use]
6628 pub fn is_categorical(&self) -> bool {
6629 false
6630 }
6631
6632 #[must_use]
6633 pub fn is_floating(&self) -> bool {
6634 false
6635 }
6636
6637 #[must_use]
6638 pub fn is_integer(&self) -> bool {
6639 false
6640 }
6641
6642 #[must_use]
6643 pub fn is_interval(&self) -> bool {
6644 false
6645 }
6646
6647 #[must_use]
6648 pub fn is_numeric(&self) -> bool {
6649 false
6650 }
6651
6652 #[must_use]
6653 pub fn is_object(&self) -> bool {
6654 false
6655 }
6656
6657 #[must_use]
6658 pub fn freq(&self) -> Option<PeriodFreq> {
6659 self.values.first().map(|period| period.freq)
6660 }
6661
6662 #[must_use]
6664 pub fn asi8(&self) -> Vec<i64> {
6665 self.values.iter().map(|period| period.ordinal).collect()
6666 }
6667
6668 #[must_use]
6669 pub fn to_list(&self) -> Vec<Period> {
6670 self.values.clone()
6671 }
6672
6673 #[must_use]
6674 pub fn tolist(&self) -> Vec<Period> {
6675 self.to_list()
6676 }
6677
6678 #[must_use]
6679 pub fn to_numpy(&self) -> Vec<Period> {
6680 self.values.clone()
6681 }
6682
6683 #[must_use]
6684 pub fn array(&self) -> Vec<Period> {
6685 self.values.clone()
6686 }
6687
6688 #[must_use]
6689 pub fn to_index(&self) -> Index {
6690 Index::from_utf8(self.values.iter().map(Period::to_string).collect())
6691 .set_names(self.name.as_deref())
6692 }
6693
6694 #[must_use]
6697 pub fn unique(&self) -> Self {
6698 let mut seen = FxHashSet::<&Period>::default();
6699 let mut uniques = Vec::<Period>::new();
6700 for period in &self.values {
6701 if seen.insert(period) {
6702 uniques.push(*period);
6703 }
6704 }
6705 Self {
6706 values: uniques,
6707 name: self.name.clone(),
6708 }
6709 }
6710
6711 #[must_use]
6714 pub fn duplicated(&self, keep: DuplicateKeep) -> Vec<bool> {
6715 let mut result = vec![false; self.values.len()];
6716 match keep {
6717 DuplicateKeep::First => {
6718 let mut seen = FxHashSet::<&Period>::default();
6719 for (i, period) in self.values.iter().enumerate() {
6720 if !seen.insert(period) {
6721 result[i] = true;
6722 }
6723 }
6724 }
6725 DuplicateKeep::Last => {
6726 let mut seen = FxHashSet::<&Period>::default();
6727 for (i, period) in self.values.iter().enumerate().rev() {
6728 if !seen.insert(period) {
6729 result[i] = true;
6730 }
6731 }
6732 }
6733 DuplicateKeep::None => {
6734 let mut counts = FxHashMap::<&Period, usize>::default();
6735 for period in &self.values {
6736 *counts.entry(period).or_insert(0) += 1;
6737 }
6738 for (i, period) in self.values.iter().enumerate() {
6739 if counts.get(period).copied().unwrap_or(0) > 1 {
6740 result[i] = true;
6741 }
6742 }
6743 }
6744 }
6745 result
6746 }
6747
6748 #[must_use]
6751 pub fn drop_duplicates(&self) -> Self {
6752 self.unique()
6753 }
6754
6755 #[must_use]
6758 pub fn value_counts(&self) -> Vec<(Period, usize)> {
6759 let mut order = Vec::<&Period>::new();
6760 let mut counts = FxHashMap::<&Period, usize>::default();
6761 for period in &self.values {
6762 let entry = counts.entry(period).or_insert_with(|| {
6763 order.push(period);
6764 0
6765 });
6766 *entry += 1;
6767 }
6768 let mut pairs: Vec<(Period, usize)> = order.iter().map(|p| (**p, counts[*p])).collect();
6769 pairs.sort_by_key(|entry| std::cmp::Reverse(entry.1));
6770 pairs
6771 }
6772
6773 pub fn take(&self, positions: &[usize]) -> Result<Self, IndexError> {
6777 for &p in positions {
6778 if p >= self.values.len() {
6779 return Err(IndexError::OutOfBounds {
6780 position: p,
6781 length: self.values.len(),
6782 });
6783 }
6784 }
6785 let taken: Vec<Period> = positions.iter().map(|&p| self.values[p]).collect();
6786 Ok(Self {
6787 values: taken,
6788 name: self.name.clone(),
6789 })
6790 }
6791
6792 #[must_use]
6795 pub fn repeat(&self, repeats: usize) -> Self {
6796 let mut out = Vec::with_capacity(self.values.len() * repeats);
6797 for &period in &self.values {
6798 for _ in 0..repeats {
6799 out.push(period);
6800 }
6801 }
6802 Self {
6803 values: out,
6804 name: self.name.clone(),
6805 }
6806 }
6807
6808 #[must_use]
6815 pub fn diff(&self, periods: i64) -> Vec<Option<i64>> {
6816 positional_diff(self.values.len(), periods, |current, previous| {
6817 self.values[current].diff(&self.values[previous])
6818 })
6819 }
6820
6821 pub fn asfreq(&self, freq: &str) -> Result<Self, IndexError> {
6825 self.asfreq_with_how(freq, "end")
6826 }
6827
6828 pub fn asfreq_with_how(&self, freq: &str, how: &str) -> Result<Self, IndexError> {
6833 let target_freq = PeriodFreq::parse(freq).ok_or_else(|| {
6834 IndexError::InvalidArgument(format!("asfreq: unsupported frequency '{freq}'"))
6835 })?;
6836 let boundary = parse_period_boundary_how(how, "asfreq")?;
6837 let values = self
6838 .values
6839 .iter()
6840 .copied()
6841 .map(|period| {
6842 let nanos = period_boundary_nanos(period, boundary)?;
6843 datetime_period_ordinal_at_boundary(nanos, target_freq, boundary)
6844 .map(|ordinal| Period::new(ordinal, target_freq))
6845 })
6846 .collect::<Result<Vec<_>, _>>()?;
6847 Ok(Self {
6848 values,
6849 name: self.name.clone(),
6850 })
6851 }
6852
6853 fn to_timestamp_boundary(&self, boundary: PeriodBoundary) -> Result<DatetimeIndex, IndexError> {
6854 let nanos = self
6855 .values
6856 .iter()
6857 .copied()
6858 .map(|period| period_boundary_nanos(period, boundary))
6859 .collect::<Result<Vec<_>, _>>()?;
6860 let mut out = DatetimeIndex::new(nanos);
6861 if let Some(name) = self.name() {
6862 out = out.set_name(name);
6863 }
6864 Ok(out)
6865 }
6866
6867 pub fn start_time(&self) -> Result<DatetimeIndex, IndexError> {
6871 self.to_timestamp_boundary(PeriodBoundary::Start)
6872 }
6873
6874 pub fn end_time(&self) -> Result<DatetimeIndex, IndexError> {
6878 self.to_timestamp_boundary(PeriodBoundary::End)
6879 }
6880
6881 pub fn to_timestamp(&self, how: &str) -> Result<DatetimeIndex, IndexError> {
6886 match how.trim().to_ascii_lowercase().as_str() {
6887 "" | "s" | "start" | "begin" | "b" => self.start_time(),
6888 "e" | "end" | "finish" => self.end_time(),
6889 other => Err(IndexError::InvalidArgument(format!(
6890 "to_timestamp how must be 'start' or 'end', got {other:?}"
6891 ))),
6892 }
6893 }
6894
6895 pub fn qyear(&self) -> Result<Vec<i32>, IndexError> {
6900 self.values
6901 .iter()
6902 .copied()
6903 .map(period_qyear)
6904 .collect::<Result<Vec<_>, _>>()
6905 }
6906
6907 pub fn year(&self) -> Result<Vec<Option<i32>>, IndexError> {
6911 Ok(self.start_time()?.year())
6912 }
6913
6914 pub fn month(&self) -> Result<Vec<Option<u32>>, IndexError> {
6916 Ok(self.start_time()?.month())
6917 }
6918
6919 pub fn day(&self) -> Result<Vec<Option<u32>>, IndexError> {
6921 Ok(self.start_time()?.day())
6922 }
6923
6924 pub fn hour(&self) -> Result<Vec<Option<u32>>, IndexError> {
6926 Ok(self.start_time()?.hour())
6927 }
6928
6929 pub fn minute(&self) -> Result<Vec<Option<u32>>, IndexError> {
6931 Ok(self.start_time()?.minute())
6932 }
6933
6934 pub fn second(&self) -> Result<Vec<Option<u32>>, IndexError> {
6936 Ok(self.start_time()?.second())
6937 }
6938
6939 pub fn quarter(&self) -> Result<Vec<Option<u32>>, IndexError> {
6941 Ok(self.start_time()?.quarter())
6942 }
6943
6944 pub fn weekday(&self) -> Result<Vec<Option<u32>>, IndexError> {
6946 Ok(self.start_time()?.weekday())
6947 }
6948
6949 pub fn dayofweek(&self) -> Result<Vec<Option<u32>>, IndexError> {
6952 self.weekday()
6953 }
6954
6955 pub fn day_of_week(&self) -> Result<Vec<Option<u32>>, IndexError> {
6958 self.weekday()
6959 }
6960
6961 pub fn dayofyear(&self) -> Result<Vec<Option<u32>>, IndexError> {
6963 Ok(self.start_time()?.dayofyear())
6964 }
6965
6966 pub fn day_of_year(&self) -> Result<Vec<Option<u32>>, IndexError> {
6969 self.dayofyear()
6970 }
6971
6972 pub fn days_in_month(&self) -> Result<Vec<Option<u32>>, IndexError> {
6974 Ok(self.start_time()?.daysinmonth())
6975 }
6976
6977 pub fn daysinmonth(&self) -> Result<Vec<Option<u32>>, IndexError> {
6980 self.days_in_month()
6981 }
6982
6983 pub fn week(&self) -> Result<Vec<Option<u32>>, IndexError> {
6985 Ok(self.start_time()?.week())
6986 }
6987
6988 pub fn weekofyear(&self) -> Result<Vec<Option<u32>>, IndexError> {
6991 self.week()
6992 }
6993
6994 pub fn is_leap_year(&self) -> Result<Vec<Option<bool>>, IndexError> {
6996 Ok(self.start_time()?.is_leap_year())
6997 }
6998
6999 #[must_use]
7001 pub fn resolution(&self) -> Option<&'static str> {
7002 self.values.first().map(|p| p.freq.resolution())
7003 }
7004
7005 pub fn strftime(&self, fmt: &str) -> Result<Vec<Option<String>>, IndexError> {
7007 Ok(self.start_time()?.strftime(fmt))
7008 }
7009
7010 fn ensure_homogeneous_freq(&self) -> Result<Option<PeriodFreq>, IndexError> {
7011 let mut iter = self.values.iter();
7012 let Some(first) = iter.next() else {
7013 return Ok(None);
7014 };
7015 for period in iter {
7016 if period.freq != first.freq {
7017 return Err(IndexError::InvalidArgument(format!(
7018 "PeriodIndex has mixed frequencies: {:?} and {:?}",
7019 first.freq, period.freq
7020 )));
7021 }
7022 }
7023 Ok(Some(first.freq))
7024 }
7025
7026 fn ensure_compatible_freq(&self, other: &Self) -> Result<(), IndexError> {
7027 if let (Some(left), Some(right)) = (self.values.first(), other.values.first())
7028 && left.freq != right.freq
7029 {
7030 return Err(IndexError::InvalidArgument(format!(
7031 "set operation: incompatible frequencies {:?} vs {:?}",
7032 left.freq, right.freq
7033 )));
7034 }
7035 self.ensure_homogeneous_freq()?;
7036 other.ensure_homogeneous_freq()?;
7037 Ok(())
7038 }
7039
7040 pub fn intersection(&self, other: &Self) -> Result<Self, IndexError> {
7043 self.ensure_compatible_freq(other)?;
7044 let other_set: FxHashSet<&Period> = other.values.iter().collect();
7045 let mut seen = FxHashSet::<&Period>::default();
7046 let values: Vec<Period> = self
7047 .values
7048 .iter()
7049 .filter(|p| other_set.contains(p) && seen.insert(p))
7050 .copied()
7051 .collect();
7052 Ok(Self {
7053 values,
7054 name: if self.name == other.name {
7055 self.name.clone()
7056 } else {
7057 None
7058 },
7059 })
7060 }
7061
7062 pub fn union(&self, other: &Self) -> Result<Self, IndexError> {
7065 self.ensure_compatible_freq(other)?;
7066 let mut seen = FxHashSet::<Period>::default();
7067 let values: Vec<Period> = self
7068 .values
7069 .iter()
7070 .chain(other.values.iter())
7071 .filter(|p| seen.insert(**p))
7072 .copied()
7073 .collect();
7074 Ok(Self {
7075 values,
7076 name: if self.name == other.name {
7077 self.name.clone()
7078 } else {
7079 None
7080 },
7081 })
7082 }
7083
7084 pub fn difference(&self, other: &Self) -> Result<Self, IndexError> {
7087 self.ensure_compatible_freq(other)?;
7088 let other_set: FxHashSet<&Period> = other.values.iter().collect();
7089 let mut seen = FxHashSet::<&Period>::default();
7090 let values: Vec<Period> = self
7091 .values
7092 .iter()
7093 .filter(|p| !other_set.contains(p) && seen.insert(p))
7094 .copied()
7095 .collect();
7096 Ok(Self {
7097 values,
7098 name: if self.name == other.name {
7099 self.name.clone()
7100 } else {
7101 None
7102 },
7103 })
7104 }
7105
7106 pub fn symmetric_difference(&self, other: &Self) -> Result<Self, IndexError> {
7109 self.ensure_compatible_freq(other)?;
7110 let self_set: FxHashSet<&Period> = self.values.iter().collect();
7111 let other_set: FxHashSet<&Period> = other.values.iter().collect();
7112 let mut seen = FxHashSet::<Period>::default();
7113 let mut values = Vec::<Period>::new();
7114 for p in &self.values {
7115 if !other_set.contains(p) && seen.insert(*p) {
7116 values.push(*p);
7117 }
7118 }
7119 for p in &other.values {
7120 if !self_set.contains(p) && seen.insert(*p) {
7121 values.push(*p);
7122 }
7123 }
7124 Ok(Self {
7125 values,
7126 name: if self.name == other.name {
7127 self.name.clone()
7128 } else {
7129 None
7130 },
7131 })
7132 }
7133
7134 pub fn sort_values(&self) -> Result<Self, IndexError> {
7137 self.ensure_homogeneous_freq()?;
7138 let mut periods = self.values.clone();
7139 periods.sort_by_key(|period| period.ordinal);
7140 Ok(Self {
7141 values: periods,
7142 name: self.name.clone(),
7143 })
7144 }
7145
7146 pub fn sort(&self) -> Result<Self, IndexError> {
7148 self.sort_values()
7149 }
7150
7151 pub fn argmax(&self) -> Result<usize, IndexError> {
7156 self.ensure_homogeneous_freq()?;
7157 if self.values.is_empty() {
7158 return Err(IndexError::InvalidArgument(
7159 "attempt to get argmax of an empty sequence".to_owned(),
7160 ));
7161 }
7162 let mut best = 0;
7163 for (i, period) in self.values.iter().enumerate().skip(1) {
7164 if period.ordinal > self.values[best].ordinal {
7165 best = i;
7166 }
7167 }
7168 Ok(best)
7169 }
7170
7171 pub fn argmin(&self) -> Result<usize, IndexError> {
7176 self.ensure_homogeneous_freq()?;
7177 if self.values.is_empty() {
7178 return Err(IndexError::InvalidArgument(
7179 "attempt to get argmin of an empty sequence".to_owned(),
7180 ));
7181 }
7182 let mut best = 0;
7183 for (i, period) in self.values.iter().enumerate().skip(1) {
7184 if period.ordinal < self.values[best].ordinal {
7185 best = i;
7186 }
7187 }
7188 Ok(best)
7189 }
7190
7191 pub fn argsort(&self) -> Result<Vec<usize>, IndexError> {
7194 self.ensure_homogeneous_freq()?;
7195 let mut positions: Vec<usize> = (0..self.values.len()).collect();
7196 positions.sort_by_key(|&i| self.values[i].ordinal);
7197 Ok(positions)
7198 }
7199
7200 pub fn mean(&self) -> Result<Option<Period>, IndexError> {
7203 let freq = match self.ensure_homogeneous_freq()? {
7204 Some(f) => f,
7205 None => return Ok(None),
7206 };
7207 let total: i128 = self.values.iter().map(|p| i128::from(p.ordinal)).sum();
7208 let count = self.values.len() as i128;
7209 let avg = i64::try_from(total / count)
7210 .map_err(|_| IndexError::InvalidArgument("mean: ordinal overflow".to_owned()))?;
7211 Ok(Some(Period::new(avg, freq)))
7212 }
7213
7214 pub fn median(&self) -> Result<Option<Period>, IndexError> {
7218 let freq = match self.ensure_homogeneous_freq()? {
7219 Some(f) => f,
7220 None => return Ok(None),
7221 };
7222 let mut ordinals: Vec<i64> = self.values.iter().map(|p| p.ordinal).collect();
7223 ordinals.sort_unstable();
7224 let mid = ordinals.len() / 2;
7225 let median = if ordinals.len() % 2 == 1 {
7226 ordinals[mid]
7227 } else {
7228 let total = i128::from(ordinals[mid - 1]) + i128::from(ordinals[mid]);
7229 i64::try_from(total / 2)
7230 .map_err(|_| IndexError::InvalidArgument("median: ordinal overflow".to_owned()))?
7231 };
7232 Ok(Some(Period::new(median, freq)))
7233 }
7234
7235 pub fn min(&self) -> Result<Option<Period>, IndexError> {
7239 self.ensure_homogeneous_freq()?;
7240 Ok(self
7241 .values
7242 .iter()
7243 .copied()
7244 .min_by_key(|period| period.ordinal))
7245 }
7246
7247 pub fn max(&self) -> Result<Option<Period>, IndexError> {
7249 self.ensure_homogeneous_freq()?;
7250 Ok(self
7251 .values
7252 .iter()
7253 .copied()
7254 .max_by_key(|period| period.ordinal))
7255 }
7256
7257 pub fn searchsorted(&self, value: Period, side: &str) -> Result<usize, IndexError> {
7262 if side != "left" && side != "right" {
7263 return Err(IndexError::InvalidArgument(format!(
7264 "searchsorted: side must be 'left' or 'right', got {side:?}"
7265 )));
7266 }
7267 if let Some(first) = self.values.first()
7268 && first.freq != value.freq
7269 {
7270 return Err(IndexError::InvalidArgument(format!(
7271 "searchsorted: needle frequency {:?} does not match index frequency {:?}",
7272 value.freq, first.freq
7273 )));
7274 }
7275 let mut lo = 0usize;
7276 let mut hi = self.values.len();
7277 while lo < hi {
7278 let mid = lo + (hi - lo) / 2;
7279 let cmp = self.values[mid].ordinal.cmp(&value.ordinal);
7280 use std::cmp::Ordering;
7281 let go_right = matches!(
7282 (cmp, side),
7283 (Ordering::Less, _) | (Ordering::Equal, "right")
7284 );
7285 if go_right {
7286 lo = mid + 1;
7287 } else {
7288 hi = mid;
7289 }
7290 }
7291 Ok(lo)
7292 }
7293
7294 pub fn slice_indexer(
7297 &self,
7298 start: Period,
7299 end: Period,
7300 ) -> Result<std::ops::Range<usize>, IndexError> {
7301 let (left, right) = self.slice_locs(start, end)?;
7302 Ok(left..right)
7303 }
7304
7305 pub fn slice_locs(&self, start: Period, end: Period) -> Result<(usize, usize), IndexError> {
7310 if !self.is_monotonic_increasing() {
7311 return Err(IndexError::InvalidArgument(
7312 "slice_locs requires a monotonic increasing PeriodIndex".to_owned(),
7313 ));
7314 }
7315 let left = self.searchsorted(start, "left")?;
7316 let right = self.searchsorted(end, "right")?;
7317 Ok((left, right))
7318 }
7319
7320 pub fn get_loc(&self, period: Period) -> Result<usize, IndexError> {
7323 self.values
7324 .iter()
7325 .position(|p| *p == period)
7326 .ok_or_else(|| {
7327 IndexError::InvalidArgument(format!("get_loc: period {period} not in PeriodIndex"))
7328 })
7329 }
7330
7331 #[must_use]
7333 pub fn rename(&self, name: &str) -> Self {
7334 self.set_name(name)
7335 }
7336
7337 #[must_use]
7340 pub fn reindex(&self, target: &Self) -> (Self, Vec<isize>) {
7341 let indexer = self.get_indexer(target.values());
7342 (target.clone(), indexer)
7343 }
7344
7345 #[must_use]
7348 pub fn get_indexer_non_unique(&self, targets: &[Period]) -> (Vec<isize>, Vec<usize>) {
7349 let mut by_value = FxHashMap::<Period, Vec<usize>>::default();
7350 for (i, period) in self.values.iter().enumerate() {
7351 by_value.entry(*period).or_default().push(i);
7352 }
7353 let mut positions = Vec::<isize>::new();
7354 let mut missing = Vec::<usize>::new();
7355 for (idx, target) in targets.iter().enumerate() {
7356 if let Some(matches) = by_value.get(target) {
7357 positions.extend(
7358 matches
7359 .iter()
7360 .map(|p| isize::try_from(*p).unwrap_or(isize::MAX)),
7361 );
7362 } else {
7363 positions.push(-1);
7364 missing.push(idx);
7365 }
7366 }
7367 (positions, missing)
7368 }
7369
7370 #[must_use]
7373 pub fn get_indexer_for(&self, targets: &[Period]) -> Vec<isize> {
7374 self.get_indexer(targets)
7375 }
7376
7377 #[must_use]
7381 pub fn get_indexer(&self, targets: &[Period]) -> Vec<isize> {
7382 let mut positions = FxHashMap::<Period, isize>::default();
7383 for (i, period) in self.values.iter().enumerate() {
7384 positions
7385 .entry(*period)
7386 .or_insert_with(|| isize::try_from(i).unwrap_or(isize::MAX));
7387 }
7388 targets
7389 .iter()
7390 .map(|p| positions.get(p).copied().unwrap_or(-1))
7391 .collect()
7392 }
7393
7394 pub fn r#where(&self, cond: &[bool], other: Period) -> Result<Self, IndexError> {
7398 if cond.len() != self.values.len() {
7399 return Err(IndexError::LengthMismatch {
7400 expected: self.values.len(),
7401 actual: cond.len(),
7402 context: "where: cond length must match index length".to_owned(),
7403 });
7404 }
7405 if let Some(first) = self.values.first()
7406 && first.freq != other.freq
7407 {
7408 return Err(IndexError::InvalidArgument(format!(
7409 "where: replacement frequency {:?} does not match index frequency {:?}",
7410 other.freq, first.freq
7411 )));
7412 }
7413 let values: Vec<Period> = self
7414 .values
7415 .iter()
7416 .zip(cond.iter())
7417 .map(|(period, &keep)| if keep { *period } else { other })
7418 .collect();
7419 Ok(Self {
7420 values,
7421 name: self.name.clone(),
7422 })
7423 }
7424
7425 pub fn putmask(&self, mask: &[bool], value: Period) -> Result<Self, IndexError> {
7428 if mask.len() != self.values.len() {
7429 return Err(IndexError::LengthMismatch {
7430 expected: self.values.len(),
7431 actual: mask.len(),
7432 context: "putmask: mask length must match index length".to_owned(),
7433 });
7434 }
7435 if let Some(first) = self.values.first()
7436 && first.freq != value.freq
7437 {
7438 return Err(IndexError::InvalidArgument(format!(
7439 "putmask: replacement frequency {:?} does not match index frequency {:?}",
7440 value.freq, first.freq
7441 )));
7442 }
7443 let values: Vec<Period> = self
7444 .values
7445 .iter()
7446 .zip(mask.iter())
7447 .map(|(period, &replace)| if replace { value } else { *period })
7448 .collect();
7449 Ok(Self {
7450 values,
7451 name: self.name.clone(),
7452 })
7453 }
7454
7455 pub fn insert(&self, loc: usize, period: Period) -> Result<Self, IndexError> {
7458 if loc > self.values.len() {
7459 return Err(IndexError::OutOfBounds {
7460 position: loc,
7461 length: self.values.len(),
7462 });
7463 }
7464 let mut periods = self.values.clone();
7465 periods.insert(loc, period);
7466 Ok(Self {
7467 values: periods,
7468 name: self.name.clone(),
7469 })
7470 }
7471
7472 pub fn shift(&self, n: i64) -> Result<Self, IndexError> {
7476 self.ensure_homogeneous_freq()?;
7477 let values: Vec<Period> = self.values.iter().map(|p| p.shift(n)).collect();
7478 Ok(Self {
7479 values,
7480 name: self.name.clone(),
7481 })
7482 }
7483
7484 #[must_use]
7486 pub fn round(&self, _freq: &str) -> Self {
7487 self.clone()
7488 }
7489
7490 #[must_use]
7495 pub fn is_full(&self) -> bool {
7496 if self.values.len() <= 1 {
7497 return true;
7498 }
7499 let first_freq = self.values[0].freq;
7501 if self.values.iter().any(|p| p.freq != first_freq) {
7502 return false;
7503 }
7504 let mut sorted: Vec<i64> = self.values.iter().map(|p| p.ordinal).collect();
7505 sorted.sort_unstable();
7506 sorted.windows(2).all(|w| w[1] - w[0] == 1)
7507 }
7508
7509 #[must_use]
7511 pub fn format(&self) -> Vec<String> {
7512 self.values.iter().map(Period::to_string).collect()
7513 }
7514
7515 #[must_use]
7518 pub fn freqstr(&self) -> Option<String> {
7519 self.freq().map(|f| f.to_string())
7520 }
7521
7522 #[must_use]
7527 pub fn inferred_freq(&self) -> Option<String> {
7528 match self.ensure_homogeneous_freq() {
7529 Ok(Some(freq)) => Some(freq.to_string()),
7530 Ok(None) | Err(_) => None,
7531 }
7532 }
7533
7534 #[must_use]
7537 pub fn to_flat_index(&self) -> Index {
7538 self.to_index()
7539 }
7540
7541 #[must_use]
7543 pub fn r#str(&self) -> IndexStringAccessor<'_> {
7544 IndexStringAccessor::owned(self.to_flat_index())
7545 }
7546
7547 #[must_use]
7549 pub fn to_frame(&self) -> Vec<Vec<IndexLabel>> {
7550 self.to_flat_index().to_frame()
7551 }
7552
7553 #[must_use]
7555 pub fn to_series(&self) -> Vec<(IndexLabel, IndexLabel)> {
7556 self.to_flat_index().to_series()
7557 }
7558
7559 #[must_use]
7561 pub fn any(&self) -> bool {
7562 self.to_flat_index().any()
7563 }
7564
7565 #[must_use]
7567 pub fn all(&self) -> bool {
7568 self.to_flat_index().all()
7569 }
7570
7571 pub fn get_level_values(&self, level: usize) -> Result<Index, IndexError> {
7573 self.to_flat_index().get_level_values(level)
7574 }
7575
7576 pub fn droplevel(&self, level: usize) -> Result<Index, IndexError> {
7578 self.to_flat_index().droplevel(level)
7579 }
7580
7581 #[must_use]
7583 pub fn groupby(&self) -> HashMap<IndexLabel, Vec<usize>> {
7584 self.to_flat_index().groupby()
7585 }
7586
7587 #[must_use]
7589 pub fn map<F>(&self, func: F) -> Index
7590 where
7591 F: Fn(&IndexLabel) -> IndexLabel,
7592 {
7593 self.to_flat_index().map(func)
7594 }
7595
7596 pub fn astype(&self, dtype: &str) -> Result<Index, IndexError> {
7598 match dtype {
7599 "int" | "int64" => Ok(Index::from_i64(
7600 self.values.iter().map(|period| period.ordinal).collect(),
7601 )
7602 .set_names(self.name())),
7603 "datetime64[ns]" => Ok(Index::from_datetime64(
7604 self.values
7605 .iter()
7606 .copied()
7607 .map(period_start_nanos)
7608 .collect::<Result<Vec<_>, _>>()?,
7609 )
7610 .set_names(self.name())),
7611 _ => self.to_flat_index().astype(dtype),
7612 }
7613 }
7614
7615 #[must_use]
7617 pub fn asof(&self, key: &IndexLabel) -> Option<IndexLabel> {
7618 self.to_flat_index().asof(key)
7619 }
7620
7621 #[must_use]
7623 pub fn asof_locs(&self, where_index: &Index, mask: Option<&[bool]>) -> Vec<Option<usize>> {
7624 self.to_flat_index().asof_locs(where_index, mask)
7625 }
7626
7627 #[must_use]
7629 pub fn drop(&self, labels_to_drop: &[IndexLabel]) -> Index {
7630 self.to_flat_index().drop(labels_to_drop)
7631 }
7632
7633 pub fn join(&self, other: &Index, how: &str) -> Result<Index, IndexError> {
7635 self.to_flat_index().join(other, how)
7636 }
7637
7638 #[must_use]
7640 pub fn sortlevel(&self) -> (Index, Vec<usize>) {
7641 self.to_flat_index().sortlevel()
7642 }
7643
7644 #[must_use]
7646 pub fn view(&self) -> Self {
7647 self.clone()
7648 }
7649
7650 #[must_use]
7653 pub fn transpose(&self) -> Self {
7654 self.clone()
7655 }
7656
7657 #[allow(non_snake_case)]
7659 #[must_use]
7660 pub fn T(&self) -> Self {
7661 self.transpose()
7662 }
7663
7664 #[must_use]
7667 pub fn ravel(&self) -> Vec<Period> {
7668 self.values.clone()
7669 }
7670
7671 #[must_use]
7673 pub fn nlevels(&self) -> usize {
7674 1
7675 }
7676
7677 #[must_use]
7680 pub fn infer_objects(&self) -> Self {
7681 self.clone()
7682 }
7683
7684 #[must_use]
7686 pub fn isin(&self, values: &[Period]) -> Vec<bool> {
7687 let needle: FxHashSet<Period> = values.iter().copied().collect();
7688 self.values.iter().map(|p| needle.contains(p)).collect()
7689 }
7690
7691 #[must_use]
7695 pub fn append(&self, other: &Self) -> Self {
7696 let mut periods = self.values.clone();
7697 periods.extend_from_slice(&other.values);
7698 let name = if self.name == other.name {
7699 self.name.clone()
7700 } else {
7701 None
7702 };
7703 Self {
7704 values: periods,
7705 name,
7706 }
7707 }
7708
7709 pub fn delete(&self, loc: usize) -> Result<Self, IndexError> {
7712 if loc >= self.values.len() {
7713 return Err(IndexError::OutOfBounds {
7714 position: loc,
7715 length: self.values.len(),
7716 });
7717 }
7718 let mut periods = self.values.clone();
7719 periods.remove(loc);
7720 Ok(Self {
7721 values: periods,
7722 name: self.name.clone(),
7723 })
7724 }
7725
7726 #[must_use]
7730 pub fn factorize(&self) -> (Vec<isize>, Self) {
7731 let mut positions = FxHashMap::<&Period, isize>::default();
7732 let mut uniques = Vec::<Period>::new();
7733 let mut codes = Vec::with_capacity(self.values.len());
7734 for period in &self.values {
7735 if let Some(code) = positions.get(period) {
7736 codes.push(*code);
7737 } else {
7738 let code = isize::try_from(uniques.len()).unwrap_or(isize::MAX);
7739 positions.insert(period, code);
7740 uniques.push(*period);
7741 codes.push(code);
7742 }
7743 }
7744 (
7745 codes,
7746 Self {
7747 values: uniques,
7748 name: self.name.clone(),
7749 },
7750 )
7751 }
7752}
7753
7754#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
7756pub struct RangeIndex {
7757 start: i64,
7758 stop: i64,
7759 step: i64,
7760 name: Option<String>,
7761}
7762
7763impl RangeIndex {
7764 pub fn new(start: i64, stop: i64, step: i64) -> Result<Self, IndexError> {
7765 if step == 0 {
7766 return Err(IndexError::InvalidArgument(
7767 "RangeIndex step must be non-zero".to_owned(),
7768 ));
7769 }
7770 Ok(Self {
7771 start,
7772 stop,
7773 step,
7774 name: None,
7775 })
7776 }
7777
7778 pub fn from_len(len: usize) -> Result<Self, IndexError> {
7779 let stop = i64::try_from(len).map_err(|_| {
7780 IndexError::InvalidArgument("RangeIndex length exceeds i64 range".to_owned())
7781 })?;
7782 Self::new(0, stop, 1)
7783 }
7784
7785 #[must_use]
7786 pub const fn start(&self) -> i64 {
7787 self.start
7788 }
7789
7790 #[must_use]
7791 pub const fn stop(&self) -> i64 {
7792 self.stop
7793 }
7794
7795 #[must_use]
7796 pub const fn step(&self) -> i64 {
7797 self.step
7798 }
7799
7800 #[must_use]
7801 pub fn len(&self) -> usize {
7802 let start = i128::from(self.start);
7803 let stop = i128::from(self.stop);
7804 let step = i128::from(self.step);
7805 let len = if step > 0 {
7806 if start >= stop {
7807 0
7808 } else {
7809 (stop - start + step - 1) / step
7810 }
7811 } else if start <= stop {
7812 0
7813 } else {
7814 let positive_step = -step;
7815 (start - stop + positive_step - 1) / positive_step
7816 };
7817 usize::try_from(len).unwrap_or(usize::MAX)
7818 }
7819
7820 #[must_use]
7821 pub fn is_empty(&self) -> bool {
7822 self.len() == 0
7823 }
7824
7825 #[must_use]
7826 pub fn name(&self) -> Option<&str> {
7827 self.name.as_deref()
7828 }
7829
7830 #[must_use]
7831 pub fn set_name(&self, name: &str) -> Self {
7832 let mut out = self.clone();
7833 out.name = Some(name.to_owned());
7834 out
7835 }
7836
7837 #[must_use]
7838 pub fn set_names(&self, name: Option<&str>) -> Self {
7839 let mut out = self.clone();
7840 out.name = name.map(str::to_owned);
7841 out
7842 }
7843
7844 #[must_use]
7845 pub fn rename_index(&self, name: Option<&str>) -> Self {
7846 self.set_names(name)
7847 }
7848
7849 #[must_use]
7850 pub fn names(&self) -> Vec<Option<String>> {
7851 vec![self.name.clone()]
7852 }
7853
7854 #[must_use]
7855 pub fn copy(&self) -> Self {
7856 self.clone()
7857 }
7858
7859 #[must_use]
7860 pub fn shape(&self) -> (usize,) {
7861 (self.len(),)
7862 }
7863
7864 #[must_use]
7865 pub fn size(&self) -> usize {
7866 self.len()
7867 }
7868
7869 #[must_use]
7870 pub fn empty(&self) -> bool {
7871 self.is_empty()
7872 }
7873
7874 #[must_use]
7875 pub fn dtype(&self) -> &'static str {
7876 "int64"
7877 }
7878
7879 #[must_use]
7880 pub fn dtypes(&self) -> Vec<&'static str> {
7881 vec![self.dtype()]
7882 }
7883
7884 #[must_use]
7885 pub fn memory_usage(&self, _deep: bool) -> usize {
7886 self.len() * std::mem::size_of::<i64>()
7887 }
7888
7889 #[must_use]
7890 pub fn nbytes(&self) -> usize {
7891 self.memory_usage(false)
7892 }
7893
7894 #[must_use]
7895 pub fn is_unique(&self) -> bool {
7896 true
7897 }
7898
7899 #[must_use]
7900 pub fn has_duplicates(&self) -> bool {
7901 false
7902 }
7903
7904 #[must_use]
7905 pub fn is_monotonic_increasing(&self) -> bool {
7906 self.len() <= 1 || self.step > 0
7907 }
7908
7909 #[must_use]
7910 pub fn is_monotonic(&self) -> bool {
7911 self.is_monotonic_increasing()
7912 }
7913
7914 #[must_use]
7915 pub fn is_monotonic_decreasing(&self) -> bool {
7916 self.len() <= 1 || self.step < 0
7917 }
7918
7919 #[must_use]
7920 pub fn nunique(&self) -> usize {
7921 self.len()
7922 }
7923
7924 #[must_use]
7925 pub fn ndim(&self) -> usize {
7926 1
7927 }
7928
7929 pub fn item(&self) -> Result<i64, IndexError> {
7930 if self.len() == 1 {
7931 Ok(self.start)
7932 } else {
7933 Err(IndexError::InvalidArgument(format!(
7934 "item requires exactly one label, got {}",
7935 self.len()
7936 )))
7937 }
7938 }
7939
7940 #[must_use]
7941 pub fn is_(&self, other: &Self) -> bool {
7942 std::ptr::eq(self, other)
7943 }
7944
7945 #[must_use]
7946 pub fn equals(&self, other: &Self) -> bool {
7947 self.values() == other.values()
7948 }
7949
7950 #[must_use]
7951 pub fn identical(&self, other: &Self) -> bool {
7952 self.equals(other) && self.name == other.name
7953 }
7954
7955 #[must_use]
7956 pub fn holds_integer(&self) -> bool {
7957 true
7958 }
7959
7960 #[must_use]
7961 pub fn inferred_type(&self) -> &'static str {
7962 if self.is_empty() { "empty" } else { "integer" }
7963 }
7964
7965 #[must_use]
7966 pub fn is_boolean(&self) -> bool {
7967 false
7968 }
7969
7970 #[must_use]
7971 pub fn is_categorical(&self) -> bool {
7972 false
7973 }
7974
7975 #[must_use]
7976 pub fn is_floating(&self) -> bool {
7977 false
7978 }
7979
7980 #[must_use]
7981 pub fn is_integer(&self) -> bool {
7982 true
7983 }
7984
7985 #[must_use]
7986 pub fn is_interval(&self) -> bool {
7987 false
7988 }
7989
7990 #[must_use]
7991 pub fn is_numeric(&self) -> bool {
7992 true
7993 }
7994
7995 #[must_use]
7996 pub fn is_object(&self) -> bool {
7997 false
7998 }
7999
8000 #[must_use]
8001 pub fn to_index(&self) -> Index {
8002 Index::from_range(self.start, self.stop, self.step).set_names(self.name.as_deref())
8003 }
8004
8005 #[must_use]
8006 pub fn values(&self) -> Vec<i64> {
8007 self.to_index()
8008 .labels()
8009 .iter()
8010 .filter_map(|label| match label {
8011 IndexLabel::Int64(value) => Some(*value),
8012 IndexLabel::Utf8(_)
8013 | IndexLabel::Timedelta64(_)
8014 | IndexLabel::Datetime64(_)
8015 | IndexLabel::Null(_) => None,
8016 })
8017 .collect()
8018 }
8019
8020 #[must_use]
8022 pub fn diff(&self, periods: i64) -> Vec<Option<i64>> {
8023 let values = self.values();
8024 positional_diff(values.len(), periods, |current, previous| {
8025 values[current].checked_sub(values[previous])
8026 })
8027 }
8028
8029 #[must_use]
8030 pub fn to_list(&self) -> Vec<i64> {
8031 self.values()
8032 }
8033
8034 #[must_use]
8035 pub fn tolist(&self) -> Vec<i64> {
8036 self.values()
8037 }
8038
8039 #[must_use]
8040 pub fn to_numpy(&self) -> Vec<i64> {
8041 self.values()
8042 }
8043
8044 #[must_use]
8045 pub fn array(&self) -> Vec<i64> {
8046 self.values()
8047 }
8048
8049 pub fn argmax(&self) -> Result<usize, IndexError> {
8055 if self.is_empty() {
8056 return Err(IndexError::InvalidArgument(
8057 "attempt to get argmax of an empty sequence".to_owned(),
8058 ));
8059 }
8060 if self.step > 0 {
8061 Ok(self.len() - 1)
8062 } else {
8063 Ok(0)
8064 }
8065 }
8066
8067 pub fn argmin(&self) -> Result<usize, IndexError> {
8069 if self.is_empty() {
8070 return Err(IndexError::InvalidArgument(
8071 "attempt to get argmin of an empty sequence".to_owned(),
8072 ));
8073 }
8074 if self.step > 0 {
8075 Ok(0)
8076 } else {
8077 Ok(self.len() - 1)
8078 }
8079 }
8080
8081 #[must_use]
8084 pub fn argsort(&self) -> Vec<usize> {
8085 let len = self.len();
8086 if self.step >= 0 {
8087 (0..len).collect()
8088 } else {
8089 (0..len).rev().collect()
8090 }
8091 }
8092
8093 #[must_use]
8096 pub fn duplicated(&self, _keep: DuplicateKeep) -> Vec<bool> {
8097 vec![false; self.len()]
8098 }
8099
8100 #[must_use]
8103 pub fn drop_duplicates(&self) -> Self {
8104 self.clone()
8105 }
8106
8107 #[must_use]
8110 pub fn isna(&self) -> Vec<bool> {
8111 vec![false; self.len()]
8112 }
8113
8114 #[must_use]
8116 pub fn isnull(&self) -> Vec<bool> {
8117 self.isna()
8118 }
8119
8120 #[must_use]
8122 pub fn notna(&self) -> Vec<bool> {
8123 vec![true; self.len()]
8124 }
8125
8126 #[must_use]
8128 pub fn notnull(&self) -> Vec<bool> {
8129 self.notna()
8130 }
8131
8132 #[must_use]
8134 pub fn hasnans(&self) -> bool {
8135 false
8136 }
8137
8138 #[must_use]
8141 pub fn dropna(&self) -> Self {
8142 self.clone()
8143 }
8144
8145 #[must_use]
8148 pub fn fillna(&self, _value: i64) -> Self {
8149 self.clone()
8150 }
8151
8152 #[must_use]
8154 pub fn format(&self) -> Vec<String> {
8155 self.values().into_iter().map(|v| v.to_string()).collect()
8156 }
8157
8158 #[must_use]
8162 pub fn factorize(&self) -> (Vec<usize>, Self) {
8163 ((0..self.len()).collect(), self.clone())
8164 }
8165
8166 pub fn take(&self, positions: &[usize]) -> Result<Index, IndexError> {
8170 let values = self.values();
8171 for &p in positions {
8172 if p >= values.len() {
8173 return Err(IndexError::OutOfBounds {
8174 position: p,
8175 length: values.len(),
8176 });
8177 }
8178 }
8179 let labels: Vec<IndexLabel> = positions
8180 .iter()
8181 .map(|&p| IndexLabel::Int64(values[p]))
8182 .collect();
8183 let mut idx = Index::new(labels);
8184 if let Some(name) = self.name() {
8185 idx = idx.set_name(name);
8186 }
8187 Ok(idx)
8188 }
8189
8190 #[must_use]
8194 pub fn repeat(&self, repeats: usize) -> Index {
8195 let mut labels = Vec::with_capacity(self.len() * repeats);
8196 for value in self.values() {
8197 for _ in 0..repeats {
8198 labels.push(IndexLabel::Int64(value));
8199 }
8200 }
8201 let mut idx = Index::new(labels);
8202 if let Some(name) = self.name() {
8203 idx = idx.set_name(name);
8204 }
8205 idx
8206 }
8207
8208 fn first_last(&self) -> Option<(i64, i64)> {
8211 let len = self.len();
8212 if len == 0 {
8213 return None;
8214 }
8215 let last = self.start + (len as i64 - 1) * self.step;
8216 Some((self.start, last))
8217 }
8218
8219 #[must_use]
8224 pub fn sort_values(&self) -> Self {
8225 if self.is_empty() || self.step >= 0 {
8226 return self.clone();
8227 }
8228 let len = self.len();
8229 let last = self.start + (len as i64 - 1) * self.step;
8230 let new_step = -self.step;
8231 let new_stop = last + (len as i64) * new_step;
8232 Self {
8233 start: last,
8234 stop: new_stop,
8235 step: new_step,
8236 name: self.name.clone(),
8237 }
8238 }
8239
8240 #[must_use]
8242 pub fn sort(&self) -> Self {
8243 self.sort_values()
8244 }
8245
8246 #[must_use]
8249 pub fn min(&self) -> Option<i64> {
8250 let (first, last) = self.first_last()?;
8251 Some(first.min(last))
8252 }
8253
8254 #[must_use]
8256 pub fn max(&self) -> Option<i64> {
8257 let (first, last) = self.first_last()?;
8258 Some(first.max(last))
8259 }
8260
8261 #[must_use]
8265 pub fn median(&self) -> Option<f64> {
8266 let len = self.len();
8267 if len == 0 {
8268 return None;
8269 }
8270 let values = self.values();
8271 let mid = len / 2;
8272 if len % 2 == 1 {
8273 Some(values[mid] as f64)
8274 } else {
8275 Some((values[mid - 1] as f64 + values[mid] as f64) / 2.0)
8276 }
8277 }
8278
8279 #[must_use]
8282 pub fn var(&self) -> Option<f64> {
8283 let values: Vec<f64> = self.values().into_iter().map(|v| v as f64).collect();
8284 if values.len() < 2 {
8285 return None;
8286 }
8287 let mean = values.iter().sum::<f64>() / values.len() as f64;
8288 Some(values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / (values.len() as f64 - 1.0))
8289 }
8290
8291 #[must_use]
8293 pub fn std(&self) -> Option<f64> {
8294 self.var().map(f64::sqrt)
8295 }
8296
8297 #[must_use]
8300 pub fn prod(&self) -> i64 {
8301 let mut total: i128 = 1;
8302 for v in self.values() {
8303 total = total.saturating_mul(i128::from(v));
8304 }
8305 i64::try_from(total).unwrap_or(if total > 0 { i64::MAX } else { i64::MIN })
8306 }
8307
8308 #[must_use]
8312 pub fn sum(&self) -> i64 {
8313 let len = self.len();
8314 if len == 0 {
8315 return 0;
8316 }
8317 let Some((first, last)) = self.first_last() else {
8318 return 0;
8319 };
8320 let n = i128::from(len as i64);
8321 let total = (i128::from(first) + i128::from(last)) * n / 2;
8322 i64::try_from(total).unwrap_or(i64::MAX)
8323 }
8324
8325 #[must_use]
8328 pub fn mean(&self) -> Option<f64> {
8329 let len = self.len();
8330 if len == 0 {
8331 return None;
8332 }
8333 let (first, last) = self.first_last()?;
8334 Some((first as f64 + last as f64) / 2.0)
8335 }
8336
8337 pub fn searchsorted(&self, value: i64, side: &str) -> Result<usize, IndexError> {
8342 if side != "left" && side != "right" {
8343 return Err(IndexError::InvalidArgument(format!(
8344 "searchsorted: side must be 'left' or 'right', got {side:?}"
8345 )));
8346 }
8347 if self.step < 0 {
8348 return Err(IndexError::InvalidArgument(
8349 "searchsorted requires a monotonically-increasing RangeIndex".to_owned(),
8350 ));
8351 }
8352 let values = self.values();
8353 let mut lo = 0usize;
8354 let mut hi = values.len();
8355 while lo < hi {
8356 let mid = lo + (hi - lo) / 2;
8357 let cmp = values[mid].cmp(&value);
8358 use std::cmp::Ordering;
8359 let go_right = matches!(
8360 (cmp, side),
8361 (Ordering::Less, _) | (Ordering::Equal, "right")
8362 );
8363 if go_right {
8364 lo = mid + 1;
8365 } else {
8366 hi = mid;
8367 }
8368 }
8369 Ok(lo)
8370 }
8371
8372 #[must_use]
8375 pub fn to_flat_index(&self) -> Index {
8376 let labels: Vec<IndexLabel> = self.values().into_iter().map(IndexLabel::Int64).collect();
8377 let mut idx = Index::new(labels);
8378 if let Some(name) = self.name() {
8379 idx = idx.set_name(name);
8380 }
8381 idx
8382 }
8383
8384 #[must_use]
8386 pub fn r#str(&self) -> IndexStringAccessor<'_> {
8387 IndexStringAccessor::owned(self.to_flat_index())
8388 }
8389
8390 #[must_use]
8392 pub fn to_frame(&self) -> Vec<Vec<IndexLabel>> {
8393 self.to_flat_index().to_frame()
8394 }
8395
8396 #[must_use]
8398 pub fn to_series(&self) -> Vec<(IndexLabel, IndexLabel)> {
8399 self.to_flat_index().to_series()
8400 }
8401
8402 #[must_use]
8404 pub fn any(&self) -> bool {
8405 self.to_flat_index().any()
8406 }
8407
8408 #[must_use]
8410 pub fn all(&self) -> bool {
8411 self.to_flat_index().all()
8412 }
8413
8414 pub fn get_level_values(&self, level: usize) -> Result<Index, IndexError> {
8416 self.to_flat_index().get_level_values(level)
8417 }
8418
8419 pub fn droplevel(&self, level: usize) -> Result<Index, IndexError> {
8421 self.to_flat_index().droplevel(level)
8422 }
8423
8424 #[must_use]
8426 pub fn groupby(&self) -> HashMap<IndexLabel, Vec<usize>> {
8427 self.to_flat_index().groupby()
8428 }
8429
8430 #[must_use]
8432 pub fn map<F>(&self, func: F) -> Index
8433 where
8434 F: Fn(&IndexLabel) -> IndexLabel,
8435 {
8436 self.to_flat_index().map(func)
8437 }
8438
8439 pub fn astype(&self, dtype: &str) -> Result<Index, IndexError> {
8441 self.to_flat_index().astype(dtype)
8442 }
8443
8444 #[must_use]
8446 pub fn asof(&self, key: &IndexLabel) -> Option<IndexLabel> {
8447 self.to_flat_index().asof(key)
8448 }
8449
8450 #[must_use]
8452 pub fn asof_locs(&self, where_index: &Index, mask: Option<&[bool]>) -> Vec<Option<usize>> {
8453 self.to_flat_index().asof_locs(where_index, mask)
8454 }
8455
8456 #[must_use]
8458 pub fn drop(&self, labels_to_drop: &[IndexLabel]) -> Index {
8459 self.to_flat_index().drop(labels_to_drop)
8460 }
8461
8462 pub fn join(&self, other: &Index, how: &str) -> Result<Index, IndexError> {
8464 self.to_flat_index().join(other, how)
8465 }
8466
8467 #[must_use]
8469 pub fn sortlevel(&self) -> (Index, Vec<usize>) {
8470 self.to_flat_index().sortlevel()
8471 }
8472
8473 #[must_use]
8475 pub fn view(&self) -> Self {
8476 self.clone()
8477 }
8478
8479 #[must_use]
8482 pub fn transpose(&self) -> Self {
8483 self.clone()
8484 }
8485
8486 #[allow(non_snake_case)]
8488 #[must_use]
8489 pub fn T(&self) -> Self {
8490 self.transpose()
8491 }
8492
8493 #[must_use]
8495 pub fn ravel(&self) -> Vec<i64> {
8496 self.values()
8497 }
8498
8499 #[must_use]
8501 pub fn nlevels(&self) -> usize {
8502 1
8503 }
8504
8505 #[must_use]
8507 pub fn infer_objects(&self) -> Self {
8508 self.clone()
8509 }
8510
8511 #[must_use]
8513 pub fn isin(&self, values: &[i64]) -> Vec<bool> {
8514 let needle: FxHashSet<i64> = values.iter().copied().collect();
8515 self.values().iter().map(|v| needle.contains(v)).collect()
8516 }
8517
8518 pub fn slice_indexer(
8521 &self,
8522 start: i64,
8523 end: i64,
8524 ) -> Result<std::ops::Range<usize>, IndexError> {
8525 let (left, right) = self.slice_locs(start, end)?;
8526 Ok(left..right)
8527 }
8528
8529 pub fn slice_locs(&self, start: i64, end: i64) -> Result<(usize, usize), IndexError> {
8533 if self.step < 0 {
8534 return Err(IndexError::InvalidArgument(
8535 "slice_locs requires a monotonic increasing RangeIndex".to_owned(),
8536 ));
8537 }
8538 let left = self.searchsorted(start, "left")?;
8539 let right = self.searchsorted(end, "right")?;
8540 Ok((left, right))
8541 }
8542
8543 pub fn get_loc(&self, value: i64) -> Result<usize, IndexError> {
8546 if self.step == 0 {
8547 return Err(IndexError::InvalidArgument(
8548 "get_loc: zero-step RangeIndex is invalid".to_owned(),
8549 ));
8550 }
8551 let offset = value - self.start;
8552 if offset.checked_rem_euclid(self.step) != Some(0) {
8553 return Err(IndexError::InvalidArgument(format!(
8554 "get_loc: {value} not in RangeIndex"
8555 )));
8556 }
8557 let pos = offset / self.step;
8558 if pos < 0 || (pos as usize) >= self.len() {
8559 return Err(IndexError::InvalidArgument(format!(
8560 "get_loc: {value} not in RangeIndex"
8561 )));
8562 }
8563 Ok(pos as usize)
8564 }
8565
8566 #[must_use]
8568 pub fn rename(&self, name: &str) -> Self {
8569 self.set_name(name)
8570 }
8571
8572 #[must_use]
8575 pub fn reindex(&self, target: &Self) -> (Self, Vec<isize>) {
8576 let indexer = self.get_indexer(&target.values());
8577 (target.clone(), indexer)
8578 }
8579
8580 #[must_use]
8585 pub fn get_indexer_non_unique(&self, targets: &[i64]) -> (Vec<isize>, Vec<usize>) {
8586 let mut positions = Vec::<isize>::new();
8587 let mut missing = Vec::<usize>::new();
8588 for (idx, target) in targets.iter().enumerate() {
8589 match self.get_loc(*target) {
8590 Ok(p) => positions.push(p as isize),
8591 Err(_) => {
8592 positions.push(-1);
8593 missing.push(idx);
8594 }
8595 }
8596 }
8597 (positions, missing)
8598 }
8599
8600 #[must_use]
8603 pub fn get_indexer_for(&self, targets: &[i64]) -> Vec<isize> {
8604 self.get_indexer(targets)
8605 }
8606
8607 #[must_use]
8610 pub fn get_indexer(&self, targets: &[i64]) -> Vec<isize> {
8611 targets
8612 .iter()
8613 .map(|&v| self.get_loc(v).map(|p| p as isize).unwrap_or(-1))
8614 .collect()
8615 }
8616
8617 pub fn r#where(&self, cond: &[bool], other: i64) -> Result<Index, IndexError> {
8621 let values = self.values();
8622 if cond.len() != values.len() {
8623 return Err(IndexError::LengthMismatch {
8624 expected: values.len(),
8625 actual: cond.len(),
8626 context: "where: cond length must match index length".to_owned(),
8627 });
8628 }
8629 let labels: Vec<IndexLabel> = values
8630 .into_iter()
8631 .zip(cond.iter())
8632 .map(|(v, &keep)| IndexLabel::Int64(if keep { v } else { other }))
8633 .collect();
8634 let mut out = Index::new(labels);
8635 if let Some(name) = self.name() {
8636 out = out.set_name(name);
8637 }
8638 Ok(out)
8639 }
8640
8641 pub fn putmask(&self, mask: &[bool], value: i64) -> Result<Index, IndexError> {
8644 let values = self.values();
8645 if mask.len() != values.len() {
8646 return Err(IndexError::LengthMismatch {
8647 expected: values.len(),
8648 actual: mask.len(),
8649 context: "putmask: mask length must match index length".to_owned(),
8650 });
8651 }
8652 let labels: Vec<IndexLabel> = values
8653 .into_iter()
8654 .zip(mask.iter())
8655 .map(|(v, &replace)| IndexLabel::Int64(if replace { value } else { v }))
8656 .collect();
8657 let mut out = Index::new(labels);
8658 if let Some(name) = self.name() {
8659 out = out.set_name(name);
8660 }
8661 Ok(out)
8662 }
8663
8664 fn set_op_via_int<F>(&self, other: &Self, op: F) -> Index
8665 where
8666 F: FnOnce(Vec<i64>, Vec<i64>) -> Vec<i64>,
8667 {
8668 let values = op(self.values(), other.values());
8669 let labels: Vec<IndexLabel> = values.into_iter().map(IndexLabel::Int64).collect();
8670 let mut idx = Index::new(labels);
8671 if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
8672 idx = idx.set_name(name);
8673 }
8674 idx
8675 }
8676
8677 #[must_use]
8681 pub fn intersection(&self, other: &Self) -> Index {
8682 self.set_op_via_int(other, |left, right| {
8683 let right_set: FxHashSet<i64> = right.into_iter().collect();
8684 let mut seen = FxHashSet::<i64>::default();
8685 left.into_iter()
8686 .filter(|v| right_set.contains(v) && seen.insert(*v))
8687 .collect()
8688 })
8689 }
8690
8691 #[must_use]
8694 pub fn union(&self, other: &Self) -> Index {
8695 self.set_op_via_int(other, |left, right| {
8696 let mut seen = FxHashSet::<i64>::default();
8697 left.into_iter()
8698 .chain(right)
8699 .filter(|v| seen.insert(*v))
8700 .collect()
8701 })
8702 }
8703
8704 #[must_use]
8707 pub fn difference(&self, other: &Self) -> Index {
8708 let right_set: FxHashSet<i64> = other.values().into_iter().collect();
8712 let mut seen = FxHashSet::<i64>::default();
8713 let labels: Vec<IndexLabel> = self
8714 .values()
8715 .into_iter()
8716 .filter(|v| !right_set.contains(v) && seen.insert(*v))
8717 .map(IndexLabel::Int64)
8718 .collect();
8719 let mut idx = Index::new(labels);
8720 if let Some(name) = self.name() {
8721 idx = idx.set_name(name);
8722 }
8723 idx
8724 }
8725
8726 #[must_use]
8729 pub fn symmetric_difference(&self, other: &Self) -> Index {
8730 self.set_op_via_int(other, |left, right| {
8731 let left_set: FxHashSet<i64> = left.iter().copied().collect();
8732 let right_set: FxHashSet<i64> = right.iter().copied().collect();
8733 let mut seen = FxHashSet::<i64>::default();
8734 let mut out = Vec::new();
8735 for v in left {
8736 if !right_set.contains(&v) && seen.insert(v) {
8737 out.push(v);
8738 }
8739 }
8740 for v in right {
8741 if !left_set.contains(&v) && seen.insert(v) {
8742 out.push(v);
8743 }
8744 }
8745 out
8746 })
8747 }
8748
8749 pub fn insert(&self, loc: usize, value: i64) -> Result<Index, IndexError> {
8753 let values = self.values();
8754 if loc > values.len() {
8755 return Err(IndexError::OutOfBounds {
8756 position: loc,
8757 length: values.len(),
8758 });
8759 }
8760 let mut labels: Vec<IndexLabel> = values.into_iter().map(IndexLabel::Int64).collect();
8761 labels.insert(loc, IndexLabel::Int64(value));
8762 let mut out = Index::new(labels);
8763 if let Some(name) = self.name() {
8764 out = out.set_name(name);
8765 }
8766 Ok(out)
8767 }
8768
8769 #[must_use]
8774 pub fn append(&self, other: &Self) -> Index {
8775 let mut labels: Vec<IndexLabel> =
8776 self.values().into_iter().map(IndexLabel::Int64).collect();
8777 labels.extend(other.values().into_iter().map(IndexLabel::Int64));
8778 let mut out = Index::new(labels);
8779 if let Some(name) = self.name().filter(|_| self.name() == other.name()) {
8780 out = out.set_name(name);
8781 }
8782 out
8783 }
8784
8785 pub fn delete(&self, loc: usize) -> Result<Index, IndexError> {
8789 let values = self.values();
8790 if loc >= values.len() {
8791 return Err(IndexError::OutOfBounds {
8792 position: loc,
8793 length: values.len(),
8794 });
8795 }
8796 let labels: Vec<IndexLabel> = values
8797 .into_iter()
8798 .enumerate()
8799 .filter(|(i, _)| *i != loc)
8800 .map(|(_, v)| IndexLabel::Int64(v))
8801 .collect();
8802 let mut out = Index::new(labels);
8803 if let Some(name) = self.name() {
8804 out = out.set_name(name);
8805 }
8806 Ok(out)
8807 }
8808}
8809
8810#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
8812pub struct CategoricalIndex {
8813 labels: Vec<String>,
8814 categories: Vec<String>,
8815 ordered: bool,
8816 name: Option<String>,
8817}
8818
8819impl CategoricalIndex {
8820 #[must_use]
8821 pub fn from_values(labels: Vec<String>, ordered: bool) -> Self {
8822 let mut categories = Vec::<String>::new();
8826 let mut seen: FxHashSet<&str> = FxHashSet::default();
8827 for label in &labels {
8828 if seen.insert(label.as_str()) {
8829 categories.push(label.clone());
8830 }
8831 }
8832 Self {
8833 labels,
8834 categories,
8835 ordered,
8836 name: None,
8837 }
8838 }
8839
8840 pub fn with_categories(
8841 labels: Vec<String>,
8842 categories: Vec<String>,
8843 ordered: bool,
8844 ) -> Result<Self, IndexError> {
8845 let category_set: FxHashSet<&str> = categories.iter().map(String::as_str).collect();
8848 for label in &labels {
8849 if !category_set.contains(label.as_str()) {
8850 return Err(IndexError::InvalidArgument(format!(
8851 "CategoricalIndex label {label:?} is not present in categories"
8852 )));
8853 }
8854 }
8855 Ok(Self {
8856 labels,
8857 categories,
8858 ordered,
8859 name: None,
8860 })
8861 }
8862
8863 #[must_use]
8864 pub fn labels(&self) -> &[String] {
8865 &self.labels
8866 }
8867
8868 #[must_use]
8869 pub fn categories(&self) -> &[String] {
8870 &self.categories
8871 }
8872
8873 #[must_use]
8874 pub fn ordered(&self) -> bool {
8875 self.ordered
8876 }
8877
8878 #[must_use]
8879 pub fn len(&self) -> usize {
8880 self.labels.len()
8881 }
8882
8883 #[must_use]
8884 pub fn is_empty(&self) -> bool {
8885 self.labels.is_empty()
8886 }
8887
8888 #[must_use]
8889 pub fn name(&self) -> Option<&str> {
8890 self.name.as_deref()
8891 }
8892
8893 #[must_use]
8894 pub fn set_name(&self, name: &str) -> Self {
8895 let mut out = self.clone();
8896 out.name = Some(name.to_owned());
8897 out
8898 }
8899
8900 #[must_use]
8901 pub fn set_names(&self, name: Option<&str>) -> Self {
8902 let mut out = self.clone();
8903 out.name = name.map(str::to_owned);
8904 out
8905 }
8906
8907 #[must_use]
8908 pub fn rename_index(&self, name: Option<&str>) -> Self {
8909 self.set_names(name)
8910 }
8911
8912 #[must_use]
8913 pub fn names(&self) -> Vec<Option<String>> {
8914 vec![self.name.clone()]
8915 }
8916
8917 #[must_use]
8918 pub fn copy(&self) -> Self {
8919 self.clone()
8920 }
8921
8922 #[must_use]
8923 pub fn shape(&self) -> (usize,) {
8924 (self.len(),)
8925 }
8926
8927 #[must_use]
8928 pub fn size(&self) -> usize {
8929 self.len()
8930 }
8931
8932 #[must_use]
8933 pub fn empty(&self) -> bool {
8934 self.is_empty()
8935 }
8936
8937 #[must_use]
8938 pub fn dtype(&self) -> &'static str {
8939 "category"
8940 }
8941
8942 #[must_use]
8943 pub fn dtypes(&self) -> Vec<&'static str> {
8944 vec![self.dtype()]
8945 }
8946
8947 #[must_use]
8948 pub fn memory_usage(&self, deep: bool) -> usize {
8949 let fixed = (self.labels.len() + self.categories.len()) * std::mem::size_of::<String>();
8950 if deep {
8951 fixed
8952 + self.labels.iter().map(String::len).sum::<usize>()
8953 + self.categories.iter().map(String::len).sum::<usize>()
8954 + self.name.as_ref().map_or(0, String::len)
8955 } else {
8956 fixed
8957 }
8958 }
8959
8960 #[must_use]
8961 pub fn nbytes(&self) -> usize {
8962 self.memory_usage(false)
8963 }
8964
8965 #[must_use]
8966 pub fn isna(&self) -> Vec<bool> {
8967 vec![false; self.len()]
8968 }
8969
8970 #[must_use]
8971 pub fn notna(&self) -> Vec<bool> {
8972 vec![true; self.len()]
8973 }
8974
8975 pub fn diff(&self, _periods: i64) -> Result<Vec<Option<i64>>, IndexError> {
8978 Err(IndexError::InvalidArgument(
8979 "Categorical has no 'diff' method; convert to a suitable dtype before calling diff"
8980 .to_owned(),
8981 ))
8982 }
8983
8984 #[must_use]
8985 pub fn is_unique(&self) -> bool {
8986 let unique: FxHashSet<&String> = self.labels.iter().collect();
8987 unique.len() == self.labels.len()
8988 }
8989
8990 #[must_use]
8991 pub fn has_duplicates(&self) -> bool {
8992 !self.is_unique()
8993 }
8994
8995 #[must_use]
8996 pub fn is_monotonic_increasing(&self) -> bool {
8997 let codes = self.codes();
8998 codes.windows(2).all(|window| window[0] <= window[1])
8999 }
9000
9001 #[must_use]
9002 pub fn is_monotonic(&self) -> bool {
9003 self.is_monotonic_increasing()
9004 }
9005
9006 #[must_use]
9007 pub fn is_monotonic_decreasing(&self) -> bool {
9008 let codes = self.codes();
9009 codes.windows(2).all(|window| window[0] >= window[1])
9010 }
9011
9012 #[must_use]
9013 pub fn nunique(&self) -> usize {
9014 self.labels.iter().collect::<FxHashSet<_>>().len()
9015 }
9016
9017 #[must_use]
9018 pub fn ndim(&self) -> usize {
9019 1
9020 }
9021
9022 pub fn item(&self) -> Result<String, IndexError> {
9023 if self.labels.len() == 1 {
9024 Ok(self.labels[0].clone())
9025 } else {
9026 Err(IndexError::InvalidArgument(format!(
9027 "item requires exactly one label, got {}",
9028 self.labels.len()
9029 )))
9030 }
9031 }
9032
9033 #[must_use]
9034 pub fn is_(&self, other: &Self) -> bool {
9035 std::ptr::eq(self, other)
9036 }
9037
9038 #[must_use]
9039 pub fn equals(&self, other: &Self) -> bool {
9040 self.labels == other.labels
9041 && self.categories == other.categories
9042 && self.ordered == other.ordered
9043 }
9044
9045 #[must_use]
9046 pub fn identical(&self, other: &Self) -> bool {
9047 self.equals(other) && self.name == other.name
9048 }
9049
9050 #[must_use]
9051 pub fn holds_integer(&self) -> bool {
9052 false
9053 }
9054
9055 #[must_use]
9056 pub fn inferred_type(&self) -> &'static str {
9057 "categorical"
9058 }
9059
9060 #[must_use]
9061 pub fn is_boolean(&self) -> bool {
9062 false
9063 }
9064
9065 #[must_use]
9066 pub fn is_categorical(&self) -> bool {
9067 true
9068 }
9069
9070 #[must_use]
9071 pub fn is_floating(&self) -> bool {
9072 false
9073 }
9074
9075 #[must_use]
9076 pub fn is_integer(&self) -> bool {
9077 false
9078 }
9079
9080 #[must_use]
9081 pub fn is_interval(&self) -> bool {
9082 false
9083 }
9084
9085 #[must_use]
9086 pub fn is_numeric(&self) -> bool {
9087 false
9088 }
9089
9090 #[must_use]
9091 pub fn is_object(&self) -> bool {
9092 false
9093 }
9094
9095 fn category_index_map(&self) -> FxHashMap<&str, usize> {
9099 let mut map: FxHashMap<&str, usize> = FxHashMap::default();
9100 for (i, cat) in self.categories.iter().enumerate() {
9101 map.entry(cat.as_str()).or_insert(i);
9102 }
9103 map
9104 }
9105
9106 #[must_use]
9107 pub fn codes(&self) -> Vec<Option<usize>> {
9108 let map = self.category_index_map();
9112 self.labels
9113 .iter()
9114 .map(|label| map.get(label.as_str()).copied())
9115 .collect()
9116 }
9117
9118 #[must_use]
9119 pub fn values(&self) -> Vec<String> {
9120 self.labels.clone()
9121 }
9122
9123 #[must_use]
9124 pub fn to_list(&self) -> Vec<String> {
9125 self.labels.clone()
9126 }
9127
9128 #[must_use]
9129 pub fn tolist(&self) -> Vec<String> {
9130 self.to_list()
9131 }
9132
9133 #[must_use]
9134 pub fn to_numpy(&self) -> Vec<String> {
9135 self.labels.clone()
9136 }
9137
9138 #[must_use]
9139 pub fn array(&self) -> Vec<String> {
9140 self.labels.clone()
9141 }
9142
9143 #[must_use]
9144 pub fn to_index(&self) -> Index {
9145 Index::from_utf8(self.labels.clone()).set_names(self.name.as_deref())
9146 }
9147
9148 #[must_use]
9151 pub fn format(&self) -> Vec<String> {
9152 self.labels.clone()
9153 }
9154
9155 pub fn r#where(&self, cond: &[bool], other: &str) -> Result<Self, IndexError> {
9159 if cond.len() != self.labels.len() {
9160 return Err(IndexError::LengthMismatch {
9161 expected: self.labels.len(),
9162 actual: cond.len(),
9163 context: "where: cond length must match index length".to_owned(),
9164 });
9165 }
9166 if !self.categories.iter().any(|cat| cat == other) {
9167 return Err(IndexError::InvalidArgument(format!(
9168 "where: replacement {other:?} is not a category"
9169 )));
9170 }
9171 let labels: Vec<String> = self
9172 .labels
9173 .iter()
9174 .zip(cond.iter())
9175 .map(|(label, &keep)| {
9176 if keep {
9177 label.clone()
9178 } else {
9179 other.to_owned()
9180 }
9181 })
9182 .collect();
9183 Ok(Self {
9184 labels,
9185 categories: self.categories.clone(),
9186 ordered: self.ordered,
9187 name: self.name.clone(),
9188 })
9189 }
9190
9191 pub fn putmask(&self, mask: &[bool], value: &str) -> Result<Self, IndexError> {
9194 if mask.len() != self.labels.len() {
9195 return Err(IndexError::LengthMismatch {
9196 expected: self.labels.len(),
9197 actual: mask.len(),
9198 context: "putmask: mask length must match index length".to_owned(),
9199 });
9200 }
9201 if !self.categories.iter().any(|cat| cat == value) {
9202 return Err(IndexError::InvalidArgument(format!(
9203 "putmask: replacement {value:?} is not a category"
9204 )));
9205 }
9206 let labels: Vec<String> = self
9207 .labels
9208 .iter()
9209 .zip(mask.iter())
9210 .map(|(label, &replace)| {
9211 if replace {
9212 value.to_owned()
9213 } else {
9214 label.clone()
9215 }
9216 })
9217 .collect();
9218 Ok(Self {
9219 labels,
9220 categories: self.categories.clone(),
9221 ordered: self.ordered,
9222 name: self.name.clone(),
9223 })
9224 }
9225
9226 #[must_use]
9228 pub fn isnull(&self) -> Vec<bool> {
9229 self.isna()
9230 }
9231
9232 #[must_use]
9234 pub fn notnull(&self) -> Vec<bool> {
9235 self.notna()
9236 }
9237
9238 #[must_use]
9242 pub fn hasnans(&self) -> bool {
9243 false
9244 }
9245
9246 #[must_use]
9249 pub fn dropna(&self) -> Self {
9250 self.clone()
9251 }
9252
9253 #[must_use]
9257 pub fn fillna(&self, _value: &str) -> Self {
9258 self.clone()
9259 }
9260
9261 #[must_use]
9264 pub fn as_ordered(&self) -> Self {
9265 let mut out = self.clone();
9266 out.ordered = true;
9267 out
9268 }
9269
9270 #[must_use]
9273 pub fn as_unordered(&self) -> Self {
9274 let mut out = self.clone();
9275 out.ordered = false;
9276 out
9277 }
9278
9279 pub fn add_categories(&self, new: Vec<String>) -> Result<Self, IndexError> {
9283 let existing: FxHashSet<&str> = self.categories.iter().map(String::as_str).collect();
9287 for cat in &new {
9288 if existing.contains(cat.as_str()) {
9289 return Err(IndexError::InvalidArgument(format!(
9290 "add_categories: {cat:?} is already a category"
9291 )));
9292 }
9293 }
9294 let mut categories = self.categories.clone();
9295 categories.extend(new);
9296 Ok(Self {
9297 labels: self.labels.clone(),
9298 categories,
9299 ordered: self.ordered,
9300 name: self.name.clone(),
9301 })
9302 }
9303
9304 pub fn remove_categories(&self, removals: &[String]) -> Result<Self, IndexError> {
9309 let category_set: FxHashSet<&str> = self.categories.iter().map(String::as_str).collect();
9315 let label_set: FxHashSet<&str> = self.labels.iter().map(String::as_str).collect();
9316 for cat in removals {
9317 if !category_set.contains(cat.as_str()) {
9318 return Err(IndexError::InvalidArgument(format!(
9319 "remove_categories: {cat:?} is not a category"
9320 )));
9321 }
9322 if label_set.contains(cat.as_str()) {
9323 return Err(IndexError::InvalidArgument(format!(
9324 "remove_categories: {cat:?} is still in use by labels"
9325 )));
9326 }
9327 }
9328 let removals_set: FxHashSet<&String> = removals.iter().collect();
9329 let categories: Vec<String> = self
9330 .categories
9331 .iter()
9332 .filter(|cat| !removals_set.contains(cat))
9333 .cloned()
9334 .collect();
9335 Ok(Self {
9336 labels: self.labels.clone(),
9337 categories,
9338 ordered: self.ordered,
9339 name: self.name.clone(),
9340 })
9341 }
9342
9343 #[must_use]
9346 pub fn remove_unused_categories(&self) -> Self {
9347 let used: FxHashSet<&String> = self.labels.iter().collect();
9348 let categories: Vec<String> = self
9349 .categories
9350 .iter()
9351 .filter(|cat| used.contains(cat))
9352 .cloned()
9353 .collect();
9354 Self {
9355 labels: self.labels.clone(),
9356 categories,
9357 ordered: self.ordered,
9358 name: self.name.clone(),
9359 }
9360 }
9361
9362 pub fn set_categories(&self, new_categories: Vec<String>) -> Result<Self, IndexError> {
9366 let new_set: FxHashSet<&str> = new_categories.iter().map(String::as_str).collect();
9370 for label in &self.labels {
9371 if !new_set.contains(label.as_str()) {
9372 return Err(IndexError::InvalidArgument(format!(
9373 "set_categories: label {label:?} is not in the new categories"
9374 )));
9375 }
9376 }
9377 Ok(Self {
9378 labels: self.labels.clone(),
9379 categories: new_categories,
9380 ordered: self.ordered,
9381 name: self.name.clone(),
9382 })
9383 }
9384
9385 pub fn rename_categories(&self, new: Vec<String>) -> Result<Self, IndexError> {
9389 if new.len() != self.categories.len() {
9390 return Err(IndexError::InvalidArgument(format!(
9391 "rename_categories: expected {} new names, got {}",
9392 self.categories.len(),
9393 new.len()
9394 )));
9395 }
9396 let mapping: std::collections::HashMap<&String, &String> =
9397 self.categories.iter().zip(new.iter()).collect();
9398 let labels: Vec<String> = self
9399 .labels
9400 .iter()
9401 .map(|label| (*mapping.get(label).expect("label is a category")).clone())
9402 .collect();
9403 Ok(Self {
9404 labels,
9405 categories: new,
9406 ordered: self.ordered,
9407 name: self.name.clone(),
9408 })
9409 }
9410
9411 pub fn reorder_categories(&self, new: Vec<String>, ordered: bool) -> Result<Self, IndexError> {
9415 if new.len() != self.categories.len() {
9416 return Err(IndexError::InvalidArgument(format!(
9417 "reorder_categories: expected {} categories, got {}",
9418 self.categories.len(),
9419 new.len()
9420 )));
9421 }
9422 let existing: FxHashSet<&String> = self.categories.iter().collect();
9423 for cat in &new {
9424 if !existing.contains(cat) {
9425 return Err(IndexError::InvalidArgument(format!(
9426 "reorder_categories: {cat:?} is not an existing category"
9427 )));
9428 }
9429 }
9430 let new_set: FxHashSet<&String> = new.iter().collect();
9431 if new_set.len() != new.len() {
9432 return Err(IndexError::InvalidArgument(
9433 "reorder_categories: new categories contain duplicates".to_owned(),
9434 ));
9435 }
9436 Ok(Self {
9437 labels: self.labels.clone(),
9438 categories: new,
9439 ordered,
9440 name: self.name.clone(),
9441 })
9442 }
9443
9444 #[must_use]
9447 pub fn to_flat_index(&self) -> Index {
9448 self.to_index()
9449 }
9450
9451 #[must_use]
9453 pub fn r#str(&self) -> IndexStringAccessor<'_> {
9454 IndexStringAccessor::owned(self.to_flat_index())
9455 }
9456
9457 #[must_use]
9459 pub fn to_frame(&self) -> Vec<Vec<IndexLabel>> {
9460 self.to_flat_index().to_frame()
9461 }
9462
9463 #[must_use]
9465 pub fn to_series(&self) -> Vec<(IndexLabel, IndexLabel)> {
9466 self.to_flat_index().to_series()
9467 }
9468
9469 #[must_use]
9471 pub fn any(&self) -> bool {
9472 self.to_flat_index().any()
9473 }
9474
9475 #[must_use]
9477 pub fn all(&self) -> bool {
9478 self.to_flat_index().all()
9479 }
9480
9481 pub fn get_level_values(&self, level: usize) -> Result<Index, IndexError> {
9483 self.to_flat_index().get_level_values(level)
9484 }
9485
9486 pub fn droplevel(&self, level: usize) -> Result<Index, IndexError> {
9488 self.to_flat_index().droplevel(level)
9489 }
9490
9491 #[must_use]
9493 pub fn groupby(&self) -> HashMap<IndexLabel, Vec<usize>> {
9494 self.to_flat_index().groupby()
9495 }
9496
9497 #[must_use]
9499 pub fn map<F>(&self, func: F) -> Index
9500 where
9501 F: Fn(&IndexLabel) -> IndexLabel,
9502 {
9503 self.to_flat_index().map(func)
9504 }
9505
9506 pub fn astype(&self, dtype: &str) -> Result<Index, IndexError> {
9508 self.to_flat_index().astype(dtype)
9509 }
9510
9511 #[must_use]
9513 pub fn asof(&self, key: &IndexLabel) -> Option<IndexLabel> {
9514 self.to_flat_index().asof(key)
9515 }
9516
9517 #[must_use]
9519 pub fn asof_locs(&self, where_index: &Index, mask: Option<&[bool]>) -> Vec<Option<usize>> {
9520 self.to_flat_index().asof_locs(where_index, mask)
9521 }
9522
9523 #[must_use]
9525 pub fn drop(&self, labels_to_drop: &[IndexLabel]) -> Index {
9526 self.to_flat_index().drop(labels_to_drop)
9527 }
9528
9529 pub fn join(&self, other: &Index, how: &str) -> Result<Index, IndexError> {
9531 self.to_flat_index().join(other, how)
9532 }
9533
9534 #[must_use]
9536 pub fn sortlevel(&self) -> (Index, Vec<usize>) {
9537 self.to_flat_index().sortlevel()
9538 }
9539
9540 #[must_use]
9542 pub fn rename(&self, name: &str) -> Self {
9543 self.set_name(name)
9544 }
9545
9546 #[must_use]
9548 pub fn view(&self) -> Self {
9549 self.clone()
9550 }
9551
9552 #[must_use]
9555 pub fn transpose(&self) -> Self {
9556 self.clone()
9557 }
9558
9559 #[allow(non_snake_case)]
9561 #[must_use]
9562 pub fn T(&self) -> Self {
9563 self.transpose()
9564 }
9565
9566 #[must_use]
9569 pub fn ravel(&self) -> Vec<String> {
9570 self.labels.clone()
9571 }
9572
9573 #[must_use]
9575 pub fn nlevels(&self) -> usize {
9576 1
9577 }
9578
9579 #[must_use]
9582 pub fn infer_objects(&self) -> Self {
9583 self.clone()
9584 }
9585
9586 pub fn searchsorted(&self, value: &str, side: &str) -> Result<usize, IndexError> {
9590 self.to_index()
9591 .searchsorted(&IndexLabel::Utf8(value.to_owned()), side)
9592 }
9593
9594 pub fn slice_locs(&self, start: &str, end: &str) -> Result<(usize, usize), IndexError> {
9599 let labels_sorted = self.labels.windows(2).all(|w| w[0] <= w[1]);
9600 if !labels_sorted {
9601 return Err(IndexError::InvalidArgument(
9602 "slice_locs requires a CategoricalIndex with labels sorted lexicographically"
9603 .to_owned(),
9604 ));
9605 }
9606 let left = self.searchsorted(start, "left")?;
9607 let right = self.searchsorted(end, "right")?;
9608 Ok((left, right))
9609 }
9610
9611 pub fn slice_indexer(
9614 &self,
9615 start: &str,
9616 end: &str,
9617 ) -> Result<std::ops::Range<usize>, IndexError> {
9618 let (l, r) = self.slice_locs(start, end)?;
9619 Ok(l..r)
9620 }
9621
9622 fn set_op_via_string<F>(&self, other: &Self, op: F) -> Self
9623 where
9624 F: FnOnce(Vec<&String>, Vec<&String>) -> Vec<String>,
9625 {
9626 let labels = op(self.labels.iter().collect(), other.labels.iter().collect());
9627 let mut categories: Vec<String> = self.categories.clone();
9634 let mut seen: FxHashSet<&String> = self.categories.iter().collect();
9635 for label in &labels {
9636 if seen.insert(label) {
9637 categories.push(label.clone());
9638 }
9639 }
9640 Self {
9641 labels,
9642 categories,
9643 ordered: self.ordered,
9644 name: if self.name == other.name {
9645 self.name.clone()
9646 } else {
9647 None
9648 },
9649 }
9650 }
9651
9652 #[must_use]
9655 pub fn intersection(&self, other: &Self) -> Self {
9656 self.set_op_via_string(other, |left, right| {
9657 let right_set: FxHashSet<&&String> = right.iter().collect();
9658 let mut seen = FxHashSet::<&String>::default();
9659 left.into_iter()
9660 .filter(|label| right_set.contains(label) && seen.insert(label))
9661 .cloned()
9662 .collect()
9663 })
9664 }
9665
9666 #[must_use]
9669 pub fn union(&self, other: &Self) -> Self {
9670 self.set_op_via_string(other, |left, right| {
9671 let mut seen = FxHashSet::<&String>::default();
9672 left.into_iter()
9673 .chain(right)
9674 .filter(|label| seen.insert(label))
9675 .cloned()
9676 .collect()
9677 })
9678 }
9679
9680 #[must_use]
9683 pub fn symmetric_difference(&self, other: &Self) -> Self {
9684 self.set_op_via_string(other, |left, right| {
9685 let left_set: FxHashSet<&&String> = left.iter().collect();
9686 let right_set: FxHashSet<&&String> = right.iter().collect();
9687 let mut seen = FxHashSet::<&String>::default();
9688 let mut out = Vec::<String>::new();
9689 for label in &left {
9690 if !right_set.contains(label) && seen.insert(*label) {
9691 out.push((*label).clone());
9692 }
9693 }
9694 for label in &right {
9695 if !left_set.contains(label) && seen.insert(*label) {
9696 out.push((*label).clone());
9697 }
9698 }
9699 out
9700 })
9701 }
9702
9703 #[must_use]
9706 pub fn difference(&self, other: &Self) -> Self {
9707 let mut out = self.set_op_via_string(other, |left, right| {
9710 let right_set: FxHashSet<&&String> = right.iter().collect();
9711 let mut seen = FxHashSet::<&String>::default();
9712 left.into_iter()
9713 .filter(|label| !right_set.contains(label) && seen.insert(label))
9714 .cloned()
9715 .collect()
9716 });
9717 out.name = self.name.clone();
9718 out
9719 }
9720
9721 #[must_use]
9725 pub fn sort_values(&self) -> Self {
9726 let positions = self.argsort();
9727 let labels: Vec<String> = positions.iter().map(|&p| self.labels[p].clone()).collect();
9728 Self {
9729 labels,
9730 categories: self.categories.clone(),
9731 ordered: self.ordered,
9732 name: self.name.clone(),
9733 }
9734 }
9735
9736 #[must_use]
9738 pub fn sort(&self) -> Self {
9739 self.sort_values()
9740 }
9741
9742 #[must_use]
9755 pub fn argsort(&self) -> Vec<usize> {
9756 let map = self.category_index_map();
9757 let mut positions: Vec<usize> = (0..self.labels.len()).collect();
9758 positions.sort_by_key(|&i| {
9759 map.get(self.labels[i].as_str())
9760 .copied()
9761 .unwrap_or(usize::MAX)
9762 });
9763 positions
9764 }
9765
9766 #[must_use]
9771 pub fn append(&self, other: &Self) -> Self {
9772 let mut labels = self.labels.clone();
9773 labels.extend_from_slice(&other.labels);
9774 let mut categories = self.categories.clone();
9778 let mut seen: FxHashSet<&String> = self.categories.iter().collect();
9779 for cat in &other.categories {
9780 if seen.insert(cat) {
9781 categories.push(cat.clone());
9782 }
9783 }
9784 let name = if self.name == other.name {
9785 self.name.clone()
9786 } else {
9787 None
9788 };
9789 Self {
9790 labels,
9791 categories,
9792 ordered: self.ordered && other.ordered,
9793 name,
9794 }
9795 }
9796
9797 pub fn delete(&self, loc: usize) -> Result<Self, IndexError> {
9800 if loc >= self.labels.len() {
9801 return Err(IndexError::OutOfBounds {
9802 position: loc,
9803 length: self.labels.len(),
9804 });
9805 }
9806 let mut labels = self.labels.clone();
9807 labels.remove(loc);
9808 Ok(Self {
9809 labels,
9810 categories: self.categories.clone(),
9811 ordered: self.ordered,
9812 name: self.name.clone(),
9813 })
9814 }
9815
9816 pub fn insert(&self, loc: usize, value: &str) -> Result<Self, IndexError> {
9820 if loc > self.labels.len() {
9821 return Err(IndexError::OutOfBounds {
9822 position: loc,
9823 length: self.labels.len(),
9824 });
9825 }
9826 if !self.categories.iter().any(|cat| cat == value) {
9827 return Err(IndexError::InvalidArgument(format!(
9828 "insert: {value:?} is not a category"
9829 )));
9830 }
9831 let mut labels = self.labels.clone();
9832 labels.insert(loc, value.to_owned());
9833 Ok(Self {
9834 labels,
9835 categories: self.categories.clone(),
9836 ordered: self.ordered,
9837 name: self.name.clone(),
9838 })
9839 }
9840
9841 #[must_use]
9844 pub fn repeat(&self, repeats: usize) -> Self {
9845 let mut labels = Vec::with_capacity(self.labels.len() * repeats);
9846 for label in &self.labels {
9847 for _ in 0..repeats {
9848 labels.push(label.clone());
9849 }
9850 }
9851 Self {
9852 labels,
9853 categories: self.categories.clone(),
9854 ordered: self.ordered,
9855 name: self.name.clone(),
9856 }
9857 }
9858
9859 pub fn take(&self, positions: &[usize]) -> Result<Self, IndexError> {
9863 for &p in positions {
9864 if p >= self.labels.len() {
9865 return Err(IndexError::OutOfBounds {
9866 position: p,
9867 length: self.labels.len(),
9868 });
9869 }
9870 }
9871 let labels: Vec<String> = positions.iter().map(|&p| self.labels[p].clone()).collect();
9872 Ok(Self {
9873 labels,
9874 categories: self.categories.clone(),
9875 ordered: self.ordered,
9876 name: self.name.clone(),
9877 })
9878 }
9879
9880 #[must_use]
9883 pub fn isin(&self, values: &[String]) -> Vec<bool> {
9884 let needle: FxHashSet<&String> = values.iter().collect();
9885 self.labels.iter().map(|l| needle.contains(l)).collect()
9886 }
9887
9888 #[must_use]
9891 pub fn get_indexer_non_unique(&self, targets: &[String]) -> (Vec<isize>, Vec<usize>) {
9892 let mut by_value = FxHashMap::<&String, Vec<usize>>::default();
9893 for (i, label) in self.labels.iter().enumerate() {
9894 by_value.entry(label).or_default().push(i);
9895 }
9896 let mut positions = Vec::<isize>::new();
9897 let mut missing = Vec::<usize>::new();
9898 for (idx, target) in targets.iter().enumerate() {
9899 if let Some(matches) = by_value.get(target) {
9900 positions.extend(
9901 matches
9902 .iter()
9903 .map(|p| isize::try_from(*p).unwrap_or(isize::MAX)),
9904 );
9905 } else {
9906 positions.push(-1);
9907 missing.push(idx);
9908 }
9909 }
9910 (positions, missing)
9911 }
9912
9913 #[must_use]
9916 pub fn get_indexer(&self, targets: &[String]) -> Vec<isize> {
9917 let mut positions = FxHashMap::<&String, isize>::default();
9918 for (i, label) in self.labels.iter().enumerate() {
9919 positions
9920 .entry(label)
9921 .or_insert_with(|| isize::try_from(i).unwrap_or(isize::MAX));
9922 }
9923 targets
9924 .iter()
9925 .map(|t| positions.get(t).copied().unwrap_or(-1))
9926 .collect()
9927 }
9928
9929 #[must_use]
9932 pub fn get_indexer_for(&self, targets: &[String]) -> Vec<isize> {
9933 self.get_indexer(targets)
9934 }
9935
9936 pub fn get_loc(&self, value: &str) -> Result<usize, IndexError> {
9939 self.labels.iter().position(|l| l == value).ok_or_else(|| {
9940 IndexError::InvalidArgument(format!("get_loc: {value:?} not in CategoricalIndex"))
9941 })
9942 }
9943
9944 pub fn argmax(&self) -> Result<usize, IndexError> {
9949 if self.labels.is_empty() {
9950 return Err(IndexError::InvalidArgument(
9951 "attempt to get argmax of an empty sequence".to_owned(),
9952 ));
9953 }
9954 let mut best = 0;
9955 if self.ordered {
9956 let map = self.category_index_map();
9957 let position = |label: &String| map.get(label.as_str()).copied().unwrap_or(0);
9958 for i in 1..self.labels.len() {
9959 if position(&self.labels[i]) > position(&self.labels[best]) {
9960 best = i;
9961 }
9962 }
9963 } else {
9964 for i in 1..self.labels.len() {
9965 if self.labels[i] > self.labels[best] {
9966 best = i;
9967 }
9968 }
9969 }
9970 Ok(best)
9971 }
9972
9973 pub fn argmin(&self) -> Result<usize, IndexError> {
9976 if self.labels.is_empty() {
9977 return Err(IndexError::InvalidArgument(
9978 "attempt to get argmin of an empty sequence".to_owned(),
9979 ));
9980 }
9981 let mut best = 0;
9982 if self.ordered {
9983 let map = self.category_index_map();
9984 let position = |label: &String| map.get(label.as_str()).copied().unwrap_or(usize::MAX);
9985 for i in 1..self.labels.len() {
9986 if position(&self.labels[i]) < position(&self.labels[best]) {
9987 best = i;
9988 }
9989 }
9990 } else {
9991 for i in 1..self.labels.len() {
9992 if self.labels[i] < self.labels[best] {
9993 best = i;
9994 }
9995 }
9996 }
9997 Ok(best)
9998 }
9999
10000 #[must_use]
10004 pub fn min(&self) -> Option<&str> {
10005 if self.labels.is_empty() {
10006 return None;
10007 }
10008 if self.ordered {
10009 let map = self.category_index_map();
10011 let position = |label: &String| map.get(label.as_str()).copied().unwrap_or(usize::MAX);
10012 self.labels
10013 .iter()
10014 .min_by_key(|label| position(label))
10015 .map(String::as_str)
10016 } else {
10017 self.labels.iter().min().map(String::as_str)
10018 }
10019 }
10020
10021 #[must_use]
10023 pub fn max(&self) -> Option<&str> {
10024 if self.labels.is_empty() {
10025 return None;
10026 }
10027 if self.ordered {
10028 let map = self.category_index_map();
10029 let position = |label: &String| map.get(label.as_str()).copied().unwrap_or(0);
10030 self.labels
10031 .iter()
10032 .max_by_key(|label| position(label))
10033 .map(String::as_str)
10034 } else {
10035 self.labels.iter().max().map(String::as_str)
10036 }
10037 }
10038
10039 #[must_use]
10043 pub fn unique(&self) -> Self {
10044 let mut seen = FxHashSet::<&String>::default();
10045 let mut uniques = Vec::<String>::new();
10046 for label in &self.labels {
10047 if seen.insert(label) {
10048 uniques.push(label.clone());
10049 }
10050 }
10051 Self {
10052 labels: uniques,
10053 categories: self.categories.clone(),
10054 ordered: self.ordered,
10055 name: self.name.clone(),
10056 }
10057 }
10058
10059 #[must_use]
10062 pub fn duplicated(&self, keep: DuplicateKeep) -> Vec<bool> {
10063 self.to_index().duplicated(keep)
10064 }
10065
10066 #[must_use]
10070 pub fn drop_duplicates(&self) -> Self {
10071 self.unique()
10072 }
10073
10074 #[must_use]
10077 pub fn value_counts(&self) -> Vec<(String, usize)> {
10078 let mut order = Vec::<&String>::new();
10079 let mut counts = FxHashMap::<&String, usize>::default();
10080 for label in &self.labels {
10081 let entry = counts.entry(label).or_insert_with(|| {
10082 order.push(label);
10083 0
10084 });
10085 *entry += 1;
10086 }
10087 let mut pairs: Vec<(String, usize)> =
10088 order.iter().map(|s| ((*s).clone(), counts[*s])).collect();
10089 pairs.sort_by_key(|entry| std::cmp::Reverse(entry.1));
10091 pairs
10092 }
10093
10094 #[must_use]
10098 pub fn factorize(&self) -> (Vec<isize>, Self) {
10099 let mut positions = FxHashMap::<&String, isize>::default();
10100 let mut uniques = Vec::<String>::new();
10101 let mut codes = Vec::with_capacity(self.labels.len());
10102 for label in &self.labels {
10103 if let Some(code) = positions.get(label) {
10104 codes.push(*code);
10105 } else {
10106 let code = isize::try_from(uniques.len()).unwrap_or(isize::MAX);
10107 positions.insert(label, code);
10108 uniques.push(label.clone());
10109 codes.push(code);
10110 }
10111 }
10112 let unique_index = Self {
10113 labels: uniques,
10114 categories: self.categories.clone(),
10115 ordered: self.ordered,
10116 name: self.name.clone(),
10117 };
10118 (codes, unique_index)
10119 }
10120}
10121
10122#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
10123pub struct AlignmentPlan {
10124 pub union_index: Index,
10125 pub left_positions: Vec<Option<usize>>,
10126 pub right_positions: Vec<Option<usize>>,
10127}
10128
10129#[derive(Debug, Error, Clone, PartialEq, Eq)]
10130#[non_exhaustive]
10131pub enum IndexError {
10132 #[error("alignment vectors must have equal lengths")]
10133 InvalidAlignmentVectors,
10134 #[error("position {position} out of bounds for length {length}")]
10135 OutOfBounds { position: usize, length: usize },
10136 #[error("length mismatch: expected {expected}, got {actual} ({context})")]
10137 LengthMismatch {
10138 expected: usize,
10139 actual: usize,
10140 context: String,
10141 },
10142 #[error("invalid argument: {0}")]
10143 InvalidArgument(String),
10144}
10145
10146#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10148pub enum AlignMode {
10149 Inner,
10151 Left,
10153 Right,
10155 Outer,
10157}
10158
10159fn index_position_groups(index: &Index) -> FxHashMap<IndexLabel, Vec<usize>> {
10160 let mut groups: FxHashMap<IndexLabel, Vec<usize>> = FxHashMap::default();
10161 for (pos, label) in index.labels().iter().enumerate() {
10162 groups.entry(label.clone()).or_default().push(pos);
10163 }
10164 groups
10165}
10166
10167fn align_non_unique(left: &Index, right: &Index, mode: AlignMode) -> AlignmentPlan {
10168 let left_groups = index_position_groups(left);
10169 let right_groups = index_position_groups(right);
10170
10171 let mut out_labels = Vec::new();
10172 let mut left_positions = Vec::new();
10173 let mut right_positions = Vec::new();
10174
10175 match mode {
10176 AlignMode::Inner => {
10177 for (left_pos, label) in left.labels().iter().enumerate() {
10178 if let Some(right_hits) = right_groups.get(label) {
10179 for &right_pos in right_hits {
10180 out_labels.push(label.clone());
10181 left_positions.push(Some(left_pos));
10182 right_positions.push(Some(right_pos));
10183 }
10184 }
10185 }
10186 }
10187 AlignMode::Left => {
10188 for (left_pos, label) in left.labels().iter().enumerate() {
10189 match right_groups.get(label) {
10190 Some(right_hits) if !right_hits.is_empty() => {
10191 for &right_pos in right_hits {
10192 out_labels.push(label.clone());
10193 left_positions.push(Some(left_pos));
10194 right_positions.push(Some(right_pos));
10195 }
10196 }
10197 _ => {
10198 out_labels.push(label.clone());
10199 left_positions.push(Some(left_pos));
10200 right_positions.push(None);
10201 }
10202 }
10203 }
10204 }
10205 AlignMode::Right => {
10206 for (right_pos, label) in right.labels().iter().enumerate() {
10207 match left_groups.get(label) {
10208 Some(left_hits) if !left_hits.is_empty() => {
10209 for &left_pos in left_hits {
10210 out_labels.push(label.clone());
10211 left_positions.push(Some(left_pos));
10212 right_positions.push(Some(right_pos));
10213 }
10214 }
10215 _ => {
10216 out_labels.push(label.clone());
10217 left_positions.push(None);
10218 right_positions.push(Some(right_pos));
10219 }
10220 }
10221 }
10222 }
10223 AlignMode::Outer => {
10224 for (left_pos, label) in left.labels().iter().enumerate() {
10225 match right_groups.get(label) {
10226 Some(right_hits) if !right_hits.is_empty() => {
10227 for &right_pos in right_hits {
10228 out_labels.push(label.clone());
10229 left_positions.push(Some(left_pos));
10230 right_positions.push(Some(right_pos));
10231 }
10232 }
10233 _ => {
10234 out_labels.push(label.clone());
10235 left_positions.push(Some(left_pos));
10236 right_positions.push(None);
10237 }
10238 }
10239 }
10240
10241 for (right_pos, label) in right.labels().iter().enumerate() {
10242 if !left_groups.contains_key(label) {
10243 out_labels.push(label.clone());
10244 left_positions.push(None);
10245 right_positions.push(Some(right_pos));
10246 }
10247 }
10248 }
10249 }
10250
10251 let mut union_index = Index::new(out_labels);
10252 match mode {
10253 AlignMode::Left => {
10254 union_index.name = left.name.clone();
10255 }
10256 AlignMode::Right => {
10257 union_index.name = right.name.clone();
10258 }
10259 AlignMode::Inner | AlignMode::Outer => {}
10260 }
10261
10262 AlignmentPlan {
10263 union_index,
10264 left_positions,
10265 right_positions,
10266 }
10267}
10268
10269pub fn align(left: &Index, right: &Index, mode: AlignMode) -> AlignmentPlan {
10274 if left.has_duplicates() || right.has_duplicates() {
10275 return align_non_unique(left, right, mode);
10276 }
10277
10278 match mode {
10279 AlignMode::Inner => align_inner(left, right),
10280 AlignMode::Left => align_left(left, right),
10281 AlignMode::Right => {
10282 let plan = align_left(right, left);
10283 AlignmentPlan {
10284 union_index: plan.union_index,
10285 left_positions: plan.right_positions,
10286 right_positions: plan.left_positions,
10287 }
10288 }
10289 AlignMode::Outer => align_union(left, right),
10290 }
10291}
10292
10293pub fn align_inner(left: &Index, right: &Index) -> AlignmentPlan {
10297 if left.has_duplicates() || right.has_duplicates() {
10298 return align_non_unique(left, right, AlignMode::Inner);
10299 }
10300
10301 let right_map = right.position_map_first_ref();
10302
10303 let mut output_labels = Vec::new();
10304 let mut left_positions = Vec::new();
10305 let mut right_positions = Vec::new();
10306
10307 for (left_pos, label) in left.labels.iter().enumerate() {
10308 if let Some(&right_pos) = right_map.get(label) {
10309 output_labels.push(label.clone());
10310 left_positions.push(Some(left_pos));
10311 right_positions.push(Some(right_pos));
10312 }
10313 }
10314
10315 let shared_name = if left.name() == right.name() {
10319 left.name().map(str::to_owned)
10320 } else {
10321 None
10322 };
10323 let mut union_index = Index::new(output_labels);
10324 union_index.name = shared_name;
10325 AlignmentPlan {
10326 union_index,
10327 left_positions,
10328 right_positions,
10329 }
10330}
10331
10332pub fn align_left(left: &Index, right: &Index) -> AlignmentPlan {
10334 if left.has_duplicates() || right.has_duplicates() {
10335 return align_non_unique(left, right, AlignMode::Left);
10336 }
10337
10338 let right_map = right.position_map_first_ref();
10339
10340 let mut left_positions = Vec::with_capacity(left.len());
10341 let mut right_positions = Vec::with_capacity(left.len());
10342
10343 for (left_pos, label) in left.labels.iter().enumerate() {
10344 left_positions.push(Some(left_pos));
10345 right_positions.push(right_map.get(label).copied());
10346 }
10347
10348 AlignmentPlan {
10349 union_index: left.clone(),
10350 left_positions,
10351 right_positions,
10352 }
10353}
10354
10355pub fn align_union(left: &Index, right: &Index) -> AlignmentPlan {
10356 if left.has_duplicates() || right.has_duplicates() {
10357 return align_non_unique(left, right, AlignMode::Outer);
10358 }
10359
10360 let left_positions_map = left.position_map_first_ref();
10361 let right_positions_map = right.position_map_first_ref();
10362
10363 let mut union_labels = Vec::with_capacity(left.labels.len() + right.labels.len());
10364 union_labels.extend(left.labels.iter().cloned());
10365 for label in &right.labels {
10366 if !left_positions_map.contains_key(&label) {
10367 union_labels.push(label.clone());
10368 }
10369 }
10370
10371 let left_positions = union_labels
10372 .iter()
10373 .map(|label| left_positions_map.get(&label).copied())
10374 .collect();
10375
10376 let right_positions = union_labels
10377 .iter()
10378 .map(|label| right_positions_map.get(&label).copied())
10379 .collect();
10380
10381 let shared_name = if left.name() == right.name() {
10384 left.name().map(str::to_owned)
10385 } else {
10386 None
10387 };
10388 let mut union_index = Index::new(union_labels);
10389 union_index.name = shared_name;
10390 AlignmentPlan {
10391 union_index,
10392 left_positions,
10393 right_positions,
10394 }
10395}
10396
10397pub fn validate_alignment_plan(plan: &AlignmentPlan) -> Result<(), IndexError> {
10398 if plan.left_positions.len() != plan.right_positions.len()
10399 || plan.left_positions.len() != plan.union_index.len()
10400 {
10401 return Err(IndexError::InvalidAlignmentVectors);
10402 }
10403
10404 Ok(())
10405}
10406
10407#[derive(Debug, Clone, PartialEq, Eq)]
10411pub struct MultiAlignmentPlan {
10412 pub union_index: Index,
10413 pub positions: Vec<Vec<Option<usize>>>,
10414}
10415
10416pub fn leapfrog_union(indexes: &[&Index]) -> Index {
10422 if indexes.is_empty() {
10423 return Index::new(Vec::new());
10424 }
10425 if indexes.len() == 1 {
10426 return indexes[0].unique().sort_values();
10427 }
10428
10429 let sorted: Vec<Vec<&IndexLabel>> = indexes
10431 .iter()
10432 .map(|idx| {
10433 let mut labels: Vec<&IndexLabel> = idx.labels().iter().collect();
10434 labels.sort();
10435 labels.dedup();
10436 labels
10437 })
10438 .collect();
10439
10440 let mut heap = std::collections::BinaryHeap::new();
10442 for (i, iter) in sorted.iter().enumerate() {
10443 if !iter.is_empty() {
10444 heap.push(std::cmp::Reverse((iter[0].clone(), i, 0_usize)));
10445 }
10446 }
10447
10448 let total: usize = sorted.iter().map(|s| s.len()).sum();
10449 let mut result = Vec::with_capacity(total);
10450
10451 while let Some(std::cmp::Reverse((label, iter_idx, pos))) = heap.pop() {
10452 if result.last() != Some(&label) {
10454 result.push(label);
10455 }
10456
10457 let next_pos = pos + 1;
10458 if next_pos < sorted[iter_idx].len() {
10459 heap.push(std::cmp::Reverse((
10460 sorted[iter_idx][next_pos].clone(),
10461 iter_idx,
10462 next_pos,
10463 )));
10464 }
10465 }
10466
10467 Index::new(result)
10468}
10469
10470pub fn leapfrog_intersection(indexes: &[&Index]) -> Index {
10476 if indexes.is_empty() {
10477 return Index::new(Vec::new());
10478 }
10479 if indexes.len() == 1 {
10480 return indexes[0].unique().sort_values();
10481 }
10482
10483 let sorted: Vec<Vec<&IndexLabel>> = indexes
10485 .iter()
10486 .map(|idx| {
10487 let mut labels: Vec<&IndexLabel> = idx.labels().iter().collect();
10488 labels.sort();
10489 labels.dedup();
10490 labels
10491 })
10492 .collect();
10493
10494 let k = sorted.len();
10496 let mut cursors: Vec<usize> = vec![0; k];
10497 let mut result = Vec::new();
10498
10499 'outer: loop {
10500 for i in 0..k {
10502 if cursors[i] >= sorted[i].len() {
10503 break 'outer;
10504 }
10505 }
10506
10507 let mut max_label = sorted[0][cursors[0]];
10509 for i in 1..k {
10510 if sorted[i][cursors[i]] > max_label {
10511 max_label = sorted[i][cursors[i]];
10512 }
10513 }
10514
10515 let mut all_equal = true;
10517 for i in 0..k {
10518 let remaining = &sorted[i][cursors[i]..];
10520 match remaining.binary_search(&max_label) {
10521 Ok(offset) => {
10522 cursors[i] += offset;
10523 }
10524 Err(offset) => {
10525 cursors[i] += offset;
10526 all_equal = false;
10527 }
10528 }
10529 if cursors[i] >= sorted[i].len() {
10530 break 'outer;
10531 }
10532 }
10533
10534 if all_equal {
10535 result.push(max_label.clone());
10537 for cursor in &mut cursors {
10538 *cursor += 1;
10539 }
10540 }
10541 }
10543
10544 Index::new(result)
10545}
10546
10547pub fn multi_way_align(indexes: &[&Index]) -> MultiAlignmentPlan {
10553 if indexes.is_empty() {
10554 return MultiAlignmentPlan {
10555 union_index: Index::new(Vec::new()),
10556 positions: Vec::new(),
10557 };
10558 }
10559
10560 let mut seen: FxHashSet<&IndexLabel> = FxHashSet::with_capacity_and_hasher(
10570 indexes.iter().map(|idx| idx.labels().len()).sum(),
10571 Default::default(),
10572 );
10573 let mut union_labels: Vec<IndexLabel> = Vec::new();
10574 for idx in indexes {
10575 for label in idx.labels() {
10576 if seen.insert(label) {
10577 union_labels.push(label.clone());
10578 }
10579 }
10580 }
10581 let first_name = indexes
10584 .first()
10585 .and_then(|idx| idx.name())
10586 .map(str::to_owned);
10587 let shared_name = if indexes
10588 .iter()
10589 .all(|idx| idx.name() == first_name.as_deref())
10590 {
10591 first_name
10592 } else {
10593 None
10594 };
10595 let mut union = Index::new(union_labels);
10596 union.name = shared_name;
10597
10598 let maps: Vec<FxHashMap<&IndexLabel, usize>> = indexes
10600 .iter()
10601 .map(|idx| idx.position_map_first_ref())
10602 .collect();
10603
10604 let positions: Vec<Vec<Option<usize>>> = maps
10605 .iter()
10606 .map(|map| {
10607 union
10608 .labels
10609 .iter()
10610 .map(|label| map.get(label).copied())
10611 .collect()
10612 })
10613 .collect();
10614
10615 MultiAlignmentPlan {
10616 union_index: union,
10617 positions,
10618 }
10619}
10620
10621#[derive(Debug, Clone, Error)]
10625pub enum TimedeltaRangeError {
10626 #[error("must specify at least two of start, end, periods")]
10627 InsufficientParams,
10628 #[error("must specify no more than two of start, end, periods")]
10629 TooManyParams,
10630 #[error("freq must be positive")]
10631 NonPositiveFreq,
10632 #[error("cannot compute range: end < start with positive freq")]
10633 InvalidRange,
10634}
10635
10636pub fn timedelta_range(
10656 start: Option<i64>,
10657 end: Option<i64>,
10658 periods: Option<usize>,
10659 freq: i64,
10660 name: Option<&str>,
10661) -> Result<Index, TimedeltaRangeError> {
10662 if freq <= 0 {
10663 return Err(TimedeltaRangeError::NonPositiveFreq);
10664 }
10665
10666 let (start_ns, count) = match (start, end, periods) {
10667 (Some(s), Some(e), None) => {
10668 if e < s {
10669 return Err(TimedeltaRangeError::InvalidRange);
10670 }
10671 let n = ((e - s) / freq + 1) as usize;
10672 (s, n)
10673 }
10674 (Some(s), None, Some(p)) => (s, p),
10675 (None, Some(e), Some(p)) => {
10676 let s = e - (p.saturating_sub(1) as i64) * freq;
10677 (s, p)
10678 }
10679 (Some(_), Some(_), Some(_)) => return Err(TimedeltaRangeError::TooManyParams),
10680 _ => return Err(TimedeltaRangeError::InsufficientParams),
10681 };
10682
10683 let nanos: Vec<i64> = (0..count).map(|i| start_ns + (i as i64) * freq).collect();
10684 let mut idx = Index::from_timedelta64(nanos);
10685 if let Some(n) = name {
10686 idx = idx.set_name(n);
10687 }
10688 Ok(idx)
10689}
10690
10691#[derive(Debug, Clone, Error)]
10695pub enum DateRangeError {
10696 #[error("must specify at least two of start, end, periods")]
10697 InsufficientParams,
10698 #[error("need at least 3 dates to infer frequency")]
10699 InsufficientDates,
10700 #[error("must specify no more than two of start, end, periods")]
10701 TooManyParams,
10702 #[error("freq must be positive")]
10703 NonPositiveFreq,
10704 #[error("cannot compute range: end < start with positive freq")]
10705 InvalidRange,
10706 #[error("invalid datetime string: {0}")]
10707 ParseError(String),
10708}
10709
10710fn parse_datetime_to_nanos(s: &str) -> Result<i64, DateRangeError> {
10712 use chrono::NaiveDateTime;
10713
10714 let trimmed = s.trim();
10715
10716 if let Ok(dt) = NaiveDateTime::parse_from_str(trimmed, "%Y-%m-%d %H:%M:%S") {
10718 return datetime_to_nanos(dt);
10719 }
10720 if let Ok(dt) = NaiveDateTime::parse_from_str(trimmed, "%Y-%m-%dT%H:%M:%S") {
10721 return datetime_to_nanos(dt);
10722 }
10723
10724 if let Ok(date) = chrono::NaiveDate::parse_from_str(trimmed, "%Y-%m-%d") {
10726 let dt = date
10727 .and_hms_opt(0, 0, 0)
10728 .ok_or(DateRangeError::InvalidRange)?;
10729 return datetime_to_nanos(dt);
10730 }
10731
10732 Err(DateRangeError::ParseError(trimmed.to_owned()))
10733}
10734
10735fn datetime_to_nanos(dt: chrono::NaiveDateTime) -> Result<i64, DateRangeError> {
10736 dt.and_utc()
10737 .timestamp_nanos_opt()
10738 .ok_or(DateRangeError::InvalidRange)
10739}
10740
10741fn datetime_nanos_to_date(nanos: i64) -> Result<chrono::NaiveDate, DateRangeError> {
10742 let (date, _) = split_datetime_nanos(nanos)?;
10743 Ok(date)
10744}
10745
10746fn split_datetime_nanos(nanos: i64) -> Result<(chrono::NaiveDate, i64), DateRangeError> {
10747 let days = nanos.div_euclid(Timedelta::NANOS_PER_DAY);
10748 let time_nanos = nanos.rem_euclid(Timedelta::NANOS_PER_DAY);
10749 let epoch = chrono::NaiveDate::from_ymd_opt(1970, 1, 1).ok_or(DateRangeError::InvalidRange)?;
10750 let date = epoch
10751 .checked_add_signed(chrono::Duration::days(days))
10752 .ok_or(DateRangeError::InvalidRange)?;
10753 Ok((date, time_nanos))
10754}
10755
10756fn date_to_midnight_nanos(date: chrono::NaiveDate) -> Result<i64, DateRangeError> {
10757 let dt = date
10758 .and_hms_opt(0, 0, 0)
10759 .ok_or(DateRangeError::InvalidRange)?;
10760 dt.and_utc()
10761 .timestamp_nanos_opt()
10762 .ok_or(DateRangeError::InvalidRange)
10763}
10764
10765fn date_and_time_to_nanos(date: chrono::NaiveDate, time_nanos: i64) -> Result<i64, DateRangeError> {
10766 date_to_midnight_nanos(date)?
10767 .checked_add(time_nanos)
10768 .ok_or(DateRangeError::InvalidRange)
10769}
10770
10771fn checked_day_step(
10772 date: chrono::NaiveDate,
10773 days: i64,
10774) -> Result<chrono::NaiveDate, DateRangeError> {
10775 date.checked_add_signed(chrono::Duration::days(days))
10776 .ok_or(DateRangeError::InvalidRange)
10777}
10778
10779fn is_business_day(date: chrono::NaiveDate) -> bool {
10780 use chrono::{Datelike, Weekday};
10781
10782 !matches!(date.weekday(), Weekday::Sat | Weekday::Sun)
10783}
10784
10785fn next_business_day(mut date: chrono::NaiveDate) -> Result<chrono::NaiveDate, DateRangeError> {
10786 while !is_business_day(date) {
10787 date = checked_day_step(date, 1)?;
10788 }
10789 Ok(date)
10790}
10791
10792fn previous_business_day(mut date: chrono::NaiveDate) -> Result<chrono::NaiveDate, DateRangeError> {
10793 while !is_business_day(date) {
10794 date = checked_day_step(date, -1)?;
10795 }
10796 Ok(date)
10797}
10798
10799fn collect_business_days_from_start(
10800 start: chrono::NaiveDate,
10801 periods: usize,
10802) -> Result<Vec<i64>, DateRangeError> {
10803 let mut values = Vec::with_capacity(periods);
10804 let mut date = next_business_day(start)?;
10805 while values.len() < periods {
10806 values.push(date_to_midnight_nanos(date)?);
10807 date = next_business_day(checked_day_step(date, 1)?)?;
10808 }
10809 Ok(values)
10810}
10811
10812fn collect_business_days_through_end(
10813 end: chrono::NaiveDate,
10814 periods: usize,
10815) -> Result<Vec<i64>, DateRangeError> {
10816 let mut values = Vec::with_capacity(periods);
10817 let mut date = previous_business_day(end)?;
10818 while values.len() < periods {
10819 values.push(date_to_midnight_nanos(date)?);
10820 date = previous_business_day(checked_day_step(date, -1)?)?;
10821 }
10822 values.reverse();
10823 Ok(values)
10824}
10825
10826fn collect_business_days_between(
10827 start: chrono::NaiveDate,
10828 end: chrono::NaiveDate,
10829) -> Result<Vec<i64>, DateRangeError> {
10830 if end < start {
10831 return Err(DateRangeError::InvalidRange);
10832 }
10833
10834 let mut values = Vec::new();
10835 let mut date = next_business_day(start)?;
10836 while date <= end {
10837 values.push(date_to_midnight_nanos(date)?);
10838 date = next_business_day(checked_day_step(date, 1)?)?;
10839 }
10840 Ok(values)
10841}
10842
10843#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10845pub enum DateOffset {
10846 Day(i32),
10847 BusinessDay(i32),
10848 MonthEnd(i32),
10849}
10850
10851pub fn apply_date_offset(timestamp: &str, offset: DateOffset) -> Result<i64, DateRangeError> {
10856 let nanos = parse_datetime_to_nanos(timestamp)?;
10857 apply_date_offset_to_nanos(nanos, offset)
10858}
10859
10860pub fn apply_date_offset_to_nanos(nanos: i64, offset: DateOffset) -> Result<i64, DateRangeError> {
10862 match offset {
10863 DateOffset::Day(days) => nanos
10864 .checked_add(
10865 i64::from(days)
10866 .checked_mul(Timedelta::NANOS_PER_DAY)
10867 .ok_or(DateRangeError::InvalidRange)?,
10868 )
10869 .ok_or(DateRangeError::InvalidRange),
10870 DateOffset::BusinessDay(days) => {
10871 let (date, time_nanos) = split_datetime_nanos(nanos)?;
10872 let shifted = apply_business_day_offset(date, days)?;
10873 date_and_time_to_nanos(shifted, time_nanos)
10874 }
10875 DateOffset::MonthEnd(months) => {
10876 let (date, time_nanos) = split_datetime_nanos(nanos)?;
10877 let shifted = apply_month_end_offset(date, months)?;
10878 date_and_time_to_nanos(shifted, time_nanos)
10879 }
10880 }
10881}
10882
10883fn apply_business_day_offset(
10884 date: chrono::NaiveDate,
10885 days: i32,
10886) -> Result<chrono::NaiveDate, DateRangeError> {
10887 if days == 0 {
10888 return next_business_day(date);
10889 }
10890
10891 let mut shifted = date;
10892 if days > 0 {
10893 for _ in 0..days.unsigned_abs() {
10894 shifted = next_business_day(checked_day_step(shifted, 1)?)?;
10895 }
10896 } else {
10897 for _ in 0..days.unsigned_abs() {
10898 shifted = previous_business_day(checked_day_step(shifted, -1)?)?;
10899 }
10900 }
10901 Ok(shifted)
10902}
10903
10904fn last_day_of_month(year: i32, month: u32) -> Result<chrono::NaiveDate, DateRangeError> {
10905 let (next_year, next_month) = if month == 12 {
10906 (year.checked_add(1).ok_or(DateRangeError::InvalidRange)?, 1)
10907 } else {
10908 (year, month + 1)
10909 };
10910 let first_next_month = chrono::NaiveDate::from_ymd_opt(next_year, next_month, 1)
10911 .ok_or(DateRangeError::InvalidRange)?;
10912 checked_day_step(first_next_month, -1)
10913}
10914
10915fn add_months_to_month_end(
10916 date: chrono::NaiveDate,
10917 months: i32,
10918) -> Result<chrono::NaiveDate, DateRangeError> {
10919 use chrono::Datelike;
10920
10921 let month_index = i64::from(date.year())
10922 .checked_mul(12)
10923 .and_then(|value| value.checked_add(i64::from(date.month()) - 1))
10924 .and_then(|value| value.checked_add(i64::from(months)))
10925 .ok_or(DateRangeError::InvalidRange)?;
10926 let year =
10927 i32::try_from(month_index.div_euclid(12)).map_err(|_| DateRangeError::InvalidRange)?;
10928 let month =
10929 u32::try_from(month_index.rem_euclid(12) + 1).map_err(|_| DateRangeError::InvalidRange)?;
10930 last_day_of_month(year, month)
10931}
10932
10933fn month_ordinal(date: chrono::NaiveDate) -> i64 {
10934 use chrono::Datelike;
10935
10936 i64::from(date.year()) * 12 + i64::from(date.month()) - 1
10937}
10938
10939fn apply_month_end_offset(
10940 date: chrono::NaiveDate,
10941 months: i32,
10942) -> Result<chrono::NaiveDate, DateRangeError> {
10943 use chrono::Datelike;
10944
10945 let current_month_end = last_day_of_month(date.year(), date.month())?;
10946 if months == 0 {
10947 return if date == current_month_end {
10948 Ok(date)
10949 } else {
10950 Ok(current_month_end)
10951 };
10952 }
10953
10954 let month_steps = if months > 0 && date != current_month_end {
10955 months - 1
10956 } else {
10957 months
10958 };
10959 add_months_to_month_end(current_month_end, month_steps)
10960}
10961
10962fn fixed_frequency_name(diff: i64) -> Option<String> {
10963 if diff <= 0 {
10964 return None;
10965 }
10966
10967 let units = [
10968 (Timedelta::NANOS_PER_DAY, "D"),
10969 (Timedelta::NANOS_PER_HOUR, "h"),
10970 (Timedelta::NANOS_PER_MIN, "min"),
10971 (Timedelta::NANOS_PER_SEC, "s"),
10972 (Timedelta::NANOS_PER_MILLI, "ms"),
10973 (Timedelta::NANOS_PER_MICRO, "us"),
10974 (1, "ns"),
10975 ];
10976 for (unit_nanos, suffix) in units {
10977 if diff % unit_nanos == 0 {
10978 let count = diff / unit_nanos;
10979 return if count == 1 {
10980 Some(suffix.to_owned())
10981 } else {
10982 Some(format!("{count}{suffix}"))
10983 };
10984 }
10985 }
10986 None
10987}
10988
10989fn infer_business_day_freq(dates: &[(chrono::NaiveDate, i64)]) -> Option<String> {
10990 if dates.iter().any(|(date, _)| !is_business_day(*date)) {
10991 return None;
10992 }
10993 let first_time = dates[0].1;
10994 if dates.iter().any(|(_, time)| *time != first_time) {
10995 return None;
10996 }
10997 for window in dates.windows(2) {
10998 let expected = next_business_day(checked_day_step(window[0].0, 1).ok()?).ok()?;
10999 if window[1].0 != expected {
11000 return None;
11001 }
11002 }
11003 Some("B".to_owned())
11004}
11005
11006fn infer_month_end_freq(dates: &[(chrono::NaiveDate, i64)]) -> Option<String> {
11007 use chrono::Datelike;
11008
11009 let first_time = dates[0].1;
11010 if dates.iter().any(|(_, time)| *time != first_time) {
11011 return None;
11012 }
11013 for (date, _) in dates {
11014 if *date != last_day_of_month(date.year(), date.month()).ok()? {
11015 return None;
11016 }
11017 }
11018
11019 let step = month_ordinal(dates[1].0) - month_ordinal(dates[0].0);
11020 if step <= 0 {
11021 return None;
11022 }
11023 if dates
11024 .windows(2)
11025 .all(|window| month_ordinal(window[1].0) - month_ordinal(window[0].0) == step)
11026 {
11027 if step == 1 {
11028 Some("ME".to_owned())
11029 } else {
11030 Some(format!("{step}ME"))
11031 }
11032 } else {
11033 None
11034 }
11035}
11036
11037pub fn infer_freq(index: &Index) -> Result<Option<String>, DateRangeError> {
11042 let mut values = Vec::with_capacity(index.len());
11043 for label in index.labels() {
11044 match label {
11045 IndexLabel::Datetime64(value) if *value != i64::MIN => values.push(*value),
11046 IndexLabel::Datetime64(_) => return Ok(None),
11047 _ => {
11048 return Err(DateRangeError::ParseError(
11049 "expected datetime64 index".to_owned(),
11050 ));
11051 }
11052 }
11053 }
11054 infer_freq_from_nanos(&values)
11055}
11056
11057pub fn infer_freq_from_timestamps(timestamps: &[&str]) -> Result<Option<String>, DateRangeError> {
11059 let values: Vec<i64> = timestamps
11060 .iter()
11061 .map(|timestamp| parse_datetime_to_nanos(timestamp))
11062 .collect::<Result<_, _>>()?;
11063 infer_freq_from_nanos(&values)
11064}
11065
11066pub fn infer_freq_from_nanos(values: &[i64]) -> Result<Option<String>, DateRangeError> {
11068 if values.len() < 3 {
11069 return Err(DateRangeError::InsufficientDates);
11070 }
11071 if values.windows(2).any(|window| window[1] <= window[0]) {
11072 return Ok(None);
11073 }
11074
11075 let first_diff = values[1] - values[0];
11076 if values
11077 .windows(2)
11078 .all(|window| window[1] - window[0] == first_diff)
11079 {
11080 return Ok(fixed_frequency_name(first_diff));
11081 }
11082
11083 let dates: Vec<(chrono::NaiveDate, i64)> = values
11084 .iter()
11085 .map(|value| split_datetime_nanos(*value))
11086 .collect::<Result<_, _>>()?;
11087 if let Some(freq) = infer_business_day_freq(&dates) {
11088 return Ok(Some(freq));
11089 }
11090 if let Some(freq) = infer_month_end_freq(&dates) {
11091 return Ok(Some(freq));
11092 }
11093
11094 Ok(None)
11095}
11096
11097pub fn date_range(
11117 start: Option<&str>,
11118 end: Option<&str>,
11119 periods: Option<usize>,
11120 freq: i64,
11121 name: Option<&str>,
11122) -> Result<Index, DateRangeError> {
11123 if freq <= 0 {
11124 return Err(DateRangeError::NonPositiveFreq);
11125 }
11126
11127 let start_ns = start.map(parse_datetime_to_nanos).transpose()?;
11128 let end_ns = end.map(parse_datetime_to_nanos).transpose()?;
11129
11130 let (start_val, count) = match (start_ns, end_ns, periods) {
11131 (Some(s), Some(e), None) => {
11132 if e < s {
11133 return Err(DateRangeError::InvalidRange);
11134 }
11135 let span = e.checked_sub(s).ok_or(DateRangeError::InvalidRange)?;
11136 let n = (span / freq + 1) as usize;
11137 (s, n)
11138 }
11139 (Some(s), None, Some(p)) => (s, p),
11140 (None, Some(e), Some(p)) => {
11141 let offset = checked_date_range_offset(p.saturating_sub(1), freq)?;
11142 let s = e.checked_sub(offset).ok_or(DateRangeError::InvalidRange)?;
11143 (s, p)
11144 }
11145 (Some(_), Some(_), Some(_)) => return Err(DateRangeError::TooManyParams),
11146 _ => return Err(DateRangeError::InsufficientParams),
11147 };
11148
11149 let last_offset = checked_date_range_offset(count.saturating_sub(1), freq)?;
11150 start_val
11151 .checked_add(last_offset)
11152 .ok_or(DateRangeError::InvalidRange)?;
11153
11154 let nanos: Vec<i64> = (0..count)
11155 .map(|i| {
11156 let offset = checked_date_range_offset(i, freq)?;
11157 start_val
11158 .checked_add(offset)
11159 .ok_or(DateRangeError::InvalidRange)
11160 })
11161 .collect::<Result<_, _>>()?;
11162 let mut idx = Index::from_datetime64(nanos);
11163 if let Some(n) = name {
11164 idx = idx.set_name(n);
11165 }
11166 Ok(idx)
11167}
11168
11169fn checked_date_range_offset(steps: usize, freq: i64) -> Result<i64, DateRangeError> {
11170 let steps = i64::try_from(steps).map_err(|_| DateRangeError::InvalidRange)?;
11171 steps.checked_mul(freq).ok_or(DateRangeError::InvalidRange)
11172}
11173
11174pub fn bdate_range(
11179 start: Option<&str>,
11180 end: Option<&str>,
11181 periods: Option<usize>,
11182 name: Option<&str>,
11183) -> Result<Index, DateRangeError> {
11184 let start_date = start
11185 .map(parse_datetime_to_nanos)
11186 .transpose()?
11187 .map(datetime_nanos_to_date)
11188 .transpose()?;
11189 let end_date = end
11190 .map(parse_datetime_to_nanos)
11191 .transpose()?
11192 .map(datetime_nanos_to_date)
11193 .transpose()?;
11194
11195 let nanos = match (start_date, end_date, periods) {
11196 (Some(start), Some(end), None) => collect_business_days_between(start, end)?,
11197 (Some(start), None, Some(periods)) => collect_business_days_from_start(start, periods)?,
11198 (None, Some(end), Some(periods)) => collect_business_days_through_end(end, periods)?,
11199 (Some(_), Some(_), Some(_)) => return Err(DateRangeError::TooManyParams),
11200 _ => return Err(DateRangeError::InsufficientParams),
11201 };
11202
11203 let mut idx = Index::from_datetime64(nanos);
11204 if let Some(n) = name {
11205 idx = idx.set_name(n);
11206 }
11207 Ok(idx)
11208}
11209
11210#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
11222pub struct MultiIndex {
11223 levels: Vec<Vec<IndexLabel>>,
11225 names: Vec<Option<String>>,
11227}
11228
11229impl MultiIndex {
11230 #[must_use]
11232 pub fn nlevels(&self) -> usize {
11233 self.levels.len()
11234 }
11235
11236 #[must_use]
11238 pub fn len(&self) -> usize {
11239 self.levels.first().map_or(0, Vec::len)
11240 }
11241
11242 #[must_use]
11244 pub fn is_empty(&self) -> bool {
11245 self.len() == 0
11246 }
11247
11248 fn row_cmp(&self, a: usize, b: usize) -> std::cmp::Ordering {
11253 for level in 0..self.nlevels() {
11254 let ord = self.levels[level][a].cmp(&self.levels[level][b]);
11255 if ord != std::cmp::Ordering::Equal {
11256 return ord;
11257 }
11258 }
11259 std::cmp::Ordering::Equal
11260 }
11261
11262 #[must_use]
11273 pub fn is_monotonic_increasing(&self) -> bool {
11274 if self.len() <= 1 {
11275 return true;
11276 }
11277 (0..self.len() - 1).all(|i| self.row_cmp(i, i + 1) != std::cmp::Ordering::Greater)
11278 }
11279
11280 #[must_use]
11285 pub fn is_monotonic_decreasing(&self) -> bool {
11286 if self.len() <= 1 {
11287 return true;
11288 }
11289 (0..self.len() - 1).all(|i| self.row_cmp(i, i + 1) != std::cmp::Ordering::Less)
11290 }
11291
11292 #[must_use]
11296 pub fn is_lexsorted(&self) -> bool {
11297 self.is_monotonic_increasing()
11298 }
11299
11300 #[must_use]
11302 pub fn names(&self) -> &[Option<String>] {
11303 &self.names
11304 }
11305
11306 #[must_use]
11308 pub fn name(&self) -> Option<&str> {
11309 None
11310 }
11311
11312 #[must_use]
11314 pub fn size(&self) -> usize {
11315 self.len()
11316 }
11317
11318 #[must_use]
11320 pub fn shape(&self) -> (usize,) {
11321 (self.len(),)
11322 }
11323
11324 #[must_use]
11326 pub fn ndim(&self) -> usize {
11327 1
11328 }
11329
11330 #[must_use]
11332 pub fn empty(&self) -> bool {
11333 self.is_empty()
11334 }
11335
11336 fn shift_unsupported_error() -> IndexError {
11337 IndexError::InvalidArgument(
11338 "This method is only implemented for DatetimeIndex, PeriodIndex and TimedeltaIndex; Got type MultiIndex"
11339 .to_owned(),
11340 )
11341 }
11342
11343 pub fn shift(&self, _periods: i64, _freq: Option<&str>) -> Result<Self, IndexError> {
11345 Err(Self::shift_unsupported_error())
11346 }
11347
11348 fn astype_categorical_error() -> IndexError {
11349 IndexError::InvalidArgument(
11350 "> 1 ndim Categorical are not supported at this time".to_owned(),
11351 )
11352 }
11353
11354 fn astype_unsupported_dtype_error(dtype: &str) -> IndexError {
11355 IndexError::InvalidArgument(format!(
11356 "Setting a MultiIndex dtype to anything other than object is not supported; got {dtype}"
11357 ))
11358 }
11359
11360 pub fn astype(&self, dtype: &str) -> Result<Self, IndexError> {
11366 match dtype {
11367 "object" | "O" => Ok(self.clone()),
11368 "category" => Err(Self::astype_categorical_error()),
11369 other => Err(Self::astype_unsupported_dtype_error(other)),
11370 }
11371 }
11372
11373 fn diff_unsupported_error() -> IndexError {
11374 IndexError::InvalidArgument(
11375 "cannot perform __sub__ with this index type: MultiIndex".to_owned(),
11376 )
11377 }
11378
11379 pub fn diff(&self, _periods: i64) -> Result<Self, IndexError> {
11384 Err(Self::diff_unsupported_error())
11385 }
11386
11387 fn round_unsupported_error() -> IndexError {
11388 IndexError::InvalidArgument(
11389 "loop of ufunc does not support argument 0 of type tuple which has no callable rint method"
11390 .to_owned(),
11391 )
11392 }
11393
11394 pub fn round(&self, _decimals: i32) -> Result<Self, IndexError> {
11399 Err(Self::round_unsupported_error())
11400 }
11401
11402 fn string_accessor_error() -> IndexError {
11403 IndexError::InvalidArgument(
11404 "Can only use .str accessor with Index, not MultiIndex".to_owned(),
11405 )
11406 }
11407
11408 pub fn r#str(&self) -> Result<(), IndexError> {
11410 Err(Self::string_accessor_error())
11411 }
11412
11413 fn asof_comparison_type_name(&self) -> &'static str {
11414 match self.levels.first().and_then(|level| level.first()) {
11415 Some(IndexLabel::Int64(_)) => "int",
11416 Some(IndexLabel::Utf8(_)) => "str",
11417 Some(IndexLabel::Timedelta64(_)) => "Timedelta",
11418 Some(IndexLabel::Datetime64(_)) => "Timestamp",
11419 Some(IndexLabel::Null(fp_types::NullKind::Null)) => "NoneType",
11420 Some(IndexLabel::Null(fp_types::NullKind::NaN)) => "float",
11421 Some(IndexLabel::Null(fp_types::NullKind::NaT)) => "NaTType",
11422 None => "object",
11423 }
11424 }
11425
11426 fn asof_unsupported_error(&self) -> IndexError {
11427 IndexError::InvalidArgument(format!(
11428 "'<' not supported between instances of 'tuple' and '{}'",
11429 self.asof_comparison_type_name()
11430 ))
11431 }
11432
11433 pub fn asof(&self, _key: &[IndexLabel]) -> Result<Option<Vec<IndexLabel>>, IndexError> {
11435 if self.is_empty() {
11436 return Ok(None);
11437 }
11438 Err(self.asof_unsupported_error())
11439 }
11440
11441 fn asof_locs_no_mask_error() -> IndexError {
11442 IndexError::InvalidArgument("object too deep for desired array".to_owned())
11443 }
11444
11445 fn asof_locs_empty_mask_error() -> IndexError {
11446 IndexError::InvalidArgument("attempt to get argmax of an empty sequence".to_owned())
11447 }
11448
11449 fn asof_locs_empty_take_error() -> IndexError {
11450 IndexError::InvalidArgument("cannot do a non-empty take from an empty axes.".to_owned())
11451 }
11452
11453 fn asof_locs_mask_length_error(expected: usize, actual: usize) -> IndexError {
11454 IndexError::InvalidArgument(format!(
11455 "boolean index did not match indexed array along axis 0; size of axis is {expected} but size of corresponding boolean axis is {actual}"
11456 ))
11457 }
11458
11459 fn asof_locs_broadcast_error(where_len: usize) -> IndexError {
11460 IndexError::InvalidArgument(format!(
11461 "operands could not be broadcast together with shapes ({where_len},) (2,)"
11462 ))
11463 }
11464
11465 pub fn asof_locs(
11467 &self,
11468 where_index: &Self,
11469 mask: Option<&[bool]>,
11470 ) -> Result<Vec<Option<usize>>, IndexError> {
11471 let Some(mask) = mask else {
11472 return Err(Self::asof_locs_no_mask_error());
11473 };
11474 if mask.len() != self.len() {
11475 return Err(Self::asof_locs_mask_length_error(self.len(), mask.len()));
11476 }
11477 if mask.is_empty() && self.is_empty() && where_index.is_empty() {
11478 return Err(Self::asof_locs_empty_mask_error());
11479 }
11480 if mask.iter().all(|include| !*include) && !where_index.is_empty() {
11481 return Err(Self::asof_locs_empty_take_error());
11482 }
11483 Err(Self::asof_locs_broadcast_error(where_index.len()))
11484 }
11485
11486 #[must_use]
11488 pub fn set_names(mut self, names: Vec<Option<String>>) -> Self {
11489 self.names = names;
11491 self.names.resize(self.nlevels(), None);
11492 self
11493 }
11494
11495 pub fn rename(&self, names: Vec<Option<String>>) -> Result<Self, IndexError> {
11500 if names.len() != self.nlevels() {
11501 return Err(IndexError::LengthMismatch {
11502 expected: self.nlevels(),
11503 actual: names.len(),
11504 context: "MultiIndex.rename names length".to_owned(),
11505 });
11506 }
11507 Ok(Self {
11508 levels: self.levels.clone(),
11509 names,
11510 })
11511 }
11512
11513 pub fn rename_level(&self, name: Option<String>, level: usize) -> Result<Self, IndexError> {
11515 if level >= self.nlevels() {
11516 return Err(IndexError::OutOfBounds {
11517 position: level,
11518 length: self.nlevels(),
11519 });
11520 }
11521 let mut names = self.names.clone();
11522 names[level] = name;
11523 Ok(Self {
11524 levels: self.levels.clone(),
11525 names,
11526 })
11527 }
11528
11529 fn shared_names(&self, other: &Self) -> Vec<Option<String>> {
11530 self.names
11531 .iter()
11532 .zip(&other.names)
11533 .map(
11534 |(left, right)| {
11535 if left == right { left.clone() } else { None }
11536 },
11537 )
11538 .collect()
11539 }
11540
11541 fn ensure_same_nlevels(&self, other: &Self) -> Result<(), IndexError> {
11542 if self.nlevels() != other.nlevels() {
11543 return Err(IndexError::LengthMismatch {
11544 expected: self.nlevels(),
11545 actual: other.nlevels(),
11546 context: "MultiIndex level count mismatch".to_owned(),
11547 });
11548 }
11549 Ok(())
11550 }
11551
11552 fn tuple_at(&self, row: usize) -> Vec<IndexLabel> {
11553 self.levels.iter().map(|level| level[row].clone()).collect()
11554 }
11555
11556 fn take_existing_positions(&self, positions: &[usize]) -> Self {
11557 let levels = self
11558 .levels
11559 .iter()
11560 .map(|level| {
11561 positions
11562 .iter()
11563 .map(|&position| level[position].clone())
11564 .collect()
11565 })
11566 .collect();
11567 Self {
11568 levels,
11569 names: self.names.clone(),
11570 }
11571 }
11572
11573 fn missing_label_for_level(&self, level_idx: usize) -> IndexLabel {
11574 self.levels[level_idx]
11575 .iter()
11576 .find(|label| label.is_missing())
11577 .cloned()
11578 .unwrap_or(IndexLabel::Datetime64(i64::MIN))
11579 }
11580
11581 fn from_tuples_with_names(
11582 tuples: Vec<Vec<IndexLabel>>,
11583 names: Vec<Option<String>>,
11584 ) -> Result<Self, IndexError> {
11585 Ok(Self::from_tuples(tuples)?.set_names(names))
11586 }
11587
11588 #[must_use]
11593 pub fn levels(&self) -> Vec<Index> {
11594 self.levels
11595 .iter()
11596 .enumerate()
11597 .map(|(level_idx, level)| {
11598 let mut seen = FxHashMap::<&IndexLabel, ()>::default();
11599 let labels = level
11600 .iter()
11601 .filter(|label| !label.is_missing() && seen.insert(label, ()).is_none())
11602 .cloned()
11603 .collect();
11604 let mut index = Index::new(labels);
11605 if let Some(name) = self.names.get(level_idx).and_then(|name| name.as_ref()) {
11606 index = index.set_name(name);
11607 }
11608 index
11609 })
11610 .collect()
11611 }
11612
11613 #[must_use]
11618 pub fn codes(&self) -> Vec<Vec<isize>> {
11619 self.levels
11620 .iter()
11621 .map(|level| {
11622 let mut positions = FxHashMap::<IndexLabel, isize>::default();
11623 let mut next_code = 0_isize;
11624 level
11625 .iter()
11626 .map(|label| {
11627 if label.is_missing() {
11628 -1
11629 } else if let Some(code) = positions.get(label) {
11630 *code
11631 } else {
11632 let code = next_code;
11633 positions.insert(label.clone(), code);
11634 next_code += 1;
11635 code
11636 }
11637 })
11638 .collect()
11639 })
11640 .collect()
11641 }
11642
11643 #[must_use]
11645 pub fn levshape(&self) -> Vec<usize> {
11646 self.levels().iter().map(Index::len).collect()
11647 }
11648
11649 #[must_use]
11653 pub fn to_list(&self) -> Vec<Vec<IndexLabel>> {
11654 (0..self.len()).map(|row| self.tuple_at(row)).collect()
11655 }
11656
11657 #[must_use]
11659 pub fn tolist(&self) -> Vec<Vec<IndexLabel>> {
11660 self.to_list()
11661 }
11662
11663 #[must_use]
11665 pub fn to_numpy(&self) -> Vec<Vec<IndexLabel>> {
11666 self.to_list()
11667 }
11668
11669 #[must_use]
11671 pub fn values(&self) -> Vec<Vec<IndexLabel>> {
11672 self.to_numpy()
11673 }
11674
11675 #[must_use]
11677 pub fn array(&self) -> Vec<Vec<IndexLabel>> {
11678 self.to_numpy()
11679 }
11680
11681 #[must_use]
11683 pub fn ravel(&self) -> Vec<Vec<IndexLabel>> {
11684 self.to_numpy()
11685 }
11686
11687 #[must_use]
11689 pub fn view(&self) -> Self {
11690 self.clone()
11691 }
11692
11693 #[must_use]
11695 pub fn transpose(&self) -> Self {
11696 self.clone()
11697 }
11698
11699 #[allow(non_snake_case)]
11701 #[must_use]
11702 pub fn T(&self) -> Self {
11703 self.transpose()
11704 }
11705
11706 #[must_use]
11711 pub fn to_frame(&self) -> Vec<Vec<IndexLabel>> {
11712 self.to_list()
11713 }
11714
11715 #[must_use]
11720 pub fn to_series(&self) -> Vec<(Vec<IndexLabel>, Vec<IndexLabel>)> {
11721 self.to_list()
11722 .into_iter()
11723 .map(|tuple| (tuple.clone(), tuple))
11724 .collect()
11725 }
11726
11727 #[must_use]
11729 pub fn format(&self) -> Vec<String> {
11730 self.to_list()
11731 .into_iter()
11732 .map(|tuple| {
11733 let parts: Vec<String> = tuple.into_iter().map(|label| label.to_string()).collect();
11734 format!("({})", parts.join(", "))
11735 })
11736 .collect()
11737 }
11738
11739 #[must_use]
11744 pub fn memory_usage(&self, deep: bool) -> usize {
11745 self.levels
11746 .iter()
11747 .flatten()
11748 .map(|label| match label {
11749 IndexLabel::Int64(_)
11750 | IndexLabel::Timedelta64(_)
11751 | IndexLabel::Datetime64(_)
11752 | IndexLabel::Null(_) => 8,
11753 IndexLabel::Utf8(value) => {
11754 if deep {
11755 std::mem::size_of::<String>() + value.len()
11756 } else {
11757 std::mem::size_of::<String>()
11758 }
11759 }
11760 })
11761 .sum::<usize>()
11762 + self.nlevels() * self.len() * std::mem::size_of::<isize>()
11763 }
11764
11765 #[must_use]
11767 pub fn nbytes(&self) -> usize {
11768 self.memory_usage(false)
11769 }
11770
11771 #[must_use]
11773 pub fn dtype(&self) -> &'static str {
11774 "object"
11775 }
11776
11777 #[must_use]
11779 pub fn dtypes(&self) -> Vec<&'static str> {
11780 self.levels
11781 .iter()
11782 .map(|level| Index::new(level.clone()).dtype())
11783 .collect()
11784 }
11785
11786 #[must_use]
11788 pub fn inferred_type(&self) -> &'static str {
11789 "mixed"
11790 }
11791
11792 #[must_use]
11794 pub fn infer_objects(&self) -> Self {
11795 self.clone()
11796 }
11797
11798 #[must_use]
11800 pub fn holds_integer(&self) -> bool {
11801 false
11802 }
11803
11804 pub fn item(&self) -> Result<Vec<IndexLabel>, IndexError> {
11806 if self.len() == 1 {
11807 Ok(self.tuple_at(0))
11808 } else {
11809 Err(IndexError::InvalidArgument(format!(
11810 "item requires exactly one tuple, got {}",
11811 self.len()
11812 )))
11813 }
11814 }
11815
11816 #[must_use]
11818 pub fn copy(&self) -> Self {
11819 self.clone()
11820 }
11821
11822 fn multi_index_isna_error() -> IndexError {
11823 IndexError::InvalidArgument("isna is not defined for MultiIndex".to_owned())
11824 }
11825
11826 pub fn hasnans(&self) -> Result<bool, IndexError> {
11828 Err(Self::multi_index_isna_error())
11829 }
11830
11831 pub fn isna(&self) -> Result<Vec<bool>, IndexError> {
11833 Err(Self::multi_index_isna_error())
11834 }
11835
11836 pub fn isnull(&self) -> Result<Vec<bool>, IndexError> {
11838 Err(Self::multi_index_isna_error())
11839 }
11840
11841 pub fn notna(&self) -> Result<Vec<bool>, IndexError> {
11843 Err(Self::multi_index_isna_error())
11844 }
11845
11846 pub fn notnull(&self) -> Result<Vec<bool>, IndexError> {
11848 Err(Self::multi_index_isna_error())
11849 }
11850
11851 #[must_use]
11853 pub fn fillna(&self, value: &IndexLabel) -> Self {
11854 let levels = self
11855 .levels
11856 .iter()
11857 .map(|level| {
11858 level
11859 .iter()
11860 .map(|label| {
11861 if label.is_missing() {
11862 value.clone()
11863 } else {
11864 label.clone()
11865 }
11866 })
11867 .collect()
11868 })
11869 .collect();
11870 Self {
11871 levels,
11872 names: self.names.clone(),
11873 }
11874 }
11875
11876 pub fn fillna_tuple(&self, values: &[IndexLabel]) -> Result<Self, IndexError> {
11878 if values.len() != self.nlevels() {
11879 return Err(IndexError::LengthMismatch {
11880 expected: self.nlevels(),
11881 actual: values.len(),
11882 context: "fillna_tuple replacement arity mismatch".to_owned(),
11883 });
11884 }
11885 let levels = self
11886 .levels
11887 .iter()
11888 .enumerate()
11889 .map(|(level_idx, level)| {
11890 level
11891 .iter()
11892 .map(|label| {
11893 if label.is_missing() {
11894 values[level_idx].clone()
11895 } else {
11896 label.clone()
11897 }
11898 })
11899 .collect()
11900 })
11901 .collect();
11902 Ok(Self {
11903 levels,
11904 names: self.names.clone(),
11905 })
11906 }
11907
11908 pub fn putmask(&self, cond: &[bool], value: Vec<IndexLabel>) -> Result<Self, IndexError> {
11910 if cond.len() != self.len() {
11911 return Err(IndexError::LengthMismatch {
11912 expected: self.len(),
11913 actual: cond.len(),
11914 context: "putmask condition length mismatch".to_owned(),
11915 });
11916 }
11917 if value.len() != self.nlevels() {
11918 return Err(IndexError::LengthMismatch {
11919 expected: self.nlevels(),
11920 actual: value.len(),
11921 context: "putmask tuple arity mismatch".to_owned(),
11922 });
11923 }
11924 let tuples = (0..self.len())
11925 .map(|row| {
11926 if cond[row] {
11927 value.clone()
11928 } else {
11929 self.tuple_at(row)
11930 }
11931 })
11932 .collect();
11933 Self::from_tuples_with_names(tuples, self.names.clone())
11934 }
11935
11936 pub fn r#where(&self, cond: &[bool], other: Vec<IndexLabel>) -> Result<Self, IndexError> {
11938 if cond.len() != self.len() {
11939 return Err(IndexError::LengthMismatch {
11940 expected: self.len(),
11941 actual: cond.len(),
11942 context: "where condition length mismatch".to_owned(),
11943 });
11944 }
11945 if other.len() != self.nlevels() {
11946 return Err(IndexError::LengthMismatch {
11947 expected: self.nlevels(),
11948 actual: other.len(),
11949 context: "where tuple arity mismatch".to_owned(),
11950 });
11951 }
11952 let tuples = (0..self.len())
11953 .map(|row| {
11954 if cond[row] {
11955 self.tuple_at(row)
11956 } else {
11957 other.clone()
11958 }
11959 })
11960 .collect();
11961 Self::from_tuples_with_names(tuples, self.names.clone())
11962 }
11963
11964 pub fn map<T, F>(&self, mut mapper: F) -> Vec<T>
11966 where
11967 F: FnMut(&[IndexLabel]) -> T,
11968 {
11969 (0..self.len())
11970 .map(|row| {
11971 let tuple = self.tuple_at(row);
11972 mapper(&tuple)
11973 })
11974 .collect()
11975 }
11976
11977 pub fn set_levels(&self, new_levels: Vec<Vec<IndexLabel>>) -> Result<Self, IndexError> {
11979 if new_levels.len() != self.nlevels() {
11980 return Err(IndexError::LengthMismatch {
11981 expected: self.nlevels(),
11982 actual: new_levels.len(),
11983 context: "set_levels level count mismatch".to_owned(),
11984 });
11985 }
11986 let codes = self.codes();
11987 let mut levels = Vec::with_capacity(self.nlevels());
11988 for (level_idx, level_codes) in codes.into_iter().enumerate() {
11989 let mut level = Vec::with_capacity(self.len());
11990 for code in level_codes {
11991 if code == -1 {
11992 level.push(self.missing_label_for_level(level_idx));
11993 continue;
11994 }
11995 if code < -1 {
11996 return Err(IndexError::InvalidArgument(format!(
11997 "negative code {code} at level {level_idx}"
11998 )));
11999 }
12000 let position = usize::try_from(code).map_err(|_| {
12001 IndexError::InvalidArgument(format!("invalid code {code} at level {level_idx}"))
12002 })?;
12003 let label = new_levels[level_idx]
12004 .get(position)
12005 .ok_or(IndexError::OutOfBounds {
12006 position,
12007 length: new_levels[level_idx].len(),
12008 })?;
12009 level.push(label.clone());
12010 }
12011 levels.push(level);
12012 }
12013 Ok(Self {
12014 levels,
12015 names: self.names.clone(),
12016 })
12017 }
12018
12019 pub fn set_codes(&self, codes: Vec<Vec<isize>>) -> Result<Self, IndexError> {
12021 if codes.len() != self.nlevels() {
12022 return Err(IndexError::LengthMismatch {
12023 expected: self.nlevels(),
12024 actual: codes.len(),
12025 context: "set_codes level count mismatch".to_owned(),
12026 });
12027 }
12028 let catalogs = self.levels();
12029 let mut levels = Vec::with_capacity(self.nlevels());
12030 for (level_idx, level_codes) in codes.into_iter().enumerate() {
12031 if level_codes.len() != self.len() {
12032 return Err(IndexError::LengthMismatch {
12033 expected: self.len(),
12034 actual: level_codes.len(),
12035 context: format!("set_codes level {level_idx} length mismatch"),
12036 });
12037 }
12038 let labels = catalogs[level_idx].labels();
12039 let mut level = Vec::with_capacity(self.len());
12040 for code in level_codes {
12041 if code == -1 {
12042 level.push(self.missing_label_for_level(level_idx));
12043 continue;
12044 }
12045 if code < -1 {
12046 return Err(IndexError::InvalidArgument(format!(
12047 "negative code {code} at level {level_idx}"
12048 )));
12049 }
12050 let position = usize::try_from(code).map_err(|_| {
12051 IndexError::InvalidArgument(format!("invalid code {code} at level {level_idx}"))
12052 })?;
12053 let label = labels.get(position).ok_or(IndexError::OutOfBounds {
12054 position,
12055 length: labels.len(),
12056 })?;
12057 level.push(label.clone());
12058 }
12059 levels.push(level);
12060 }
12061 Ok(Self {
12062 levels,
12063 names: self.names.clone(),
12064 })
12065 }
12066
12067 #[must_use]
12070 pub fn remove_unused_levels(&self) -> Self {
12071 self.clone()
12072 }
12073
12074 #[must_use]
12076 pub fn is_(&self, other: &Self) -> bool {
12077 std::ptr::eq(self, other)
12078 }
12079
12080 #[must_use]
12082 pub fn is_boolean(&self) -> bool {
12083 false
12084 }
12085
12086 #[must_use]
12088 pub fn is_categorical(&self) -> bool {
12089 false
12090 }
12091
12092 #[must_use]
12094 pub fn is_floating(&self) -> bool {
12095 false
12096 }
12097
12098 #[must_use]
12100 pub fn is_integer(&self) -> bool {
12101 false
12102 }
12103
12104 #[must_use]
12106 pub fn is_interval(&self) -> bool {
12107 false
12108 }
12109
12110 #[must_use]
12112 pub fn is_numeric(&self) -> bool {
12113 false
12114 }
12115
12116 #[must_use]
12118 pub fn is_object(&self) -> bool {
12119 true
12120 }
12121
12122 #[must_use]
12124 pub fn equals(&self, other: &Self) -> bool {
12125 self.levels == other.levels
12126 }
12127
12128 #[must_use]
12130 pub fn identical(&self, other: &Self) -> bool {
12131 self.equals(other) && self.names == other.names
12132 }
12133
12134 #[must_use]
12136 pub fn equal_levels(&self, other: &Self) -> bool {
12137 self.levels() == other.levels()
12138 }
12139
12140 pub fn get_level_values(&self, level: usize) -> Result<Index, IndexError> {
12144 if level >= self.levels.len() {
12145 return Err(IndexError::OutOfBounds {
12146 position: level,
12147 length: self.levels.len(),
12148 });
12149 }
12150 let mut idx = Index::new(self.levels[level].clone());
12151 if let Some(name) = self.names.get(level).and_then(|n| n.as_ref()) {
12152 idx = idx.set_name(name);
12153 }
12154 Ok(idx)
12155 }
12156
12157 pub fn get_tuple(&self, position: usize) -> Option<Vec<&IndexLabel>> {
12159 if position >= self.len() {
12160 return None;
12161 }
12162 Some(self.levels.iter().map(|level| &level[position]).collect())
12163 }
12164
12165 pub fn take(&self, positions: &[usize]) -> Result<Self, IndexError> {
12167 for &position in positions {
12168 if position >= self.len() {
12169 return Err(IndexError::OutOfBounds {
12170 position,
12171 length: self.len(),
12172 });
12173 }
12174 }
12175
12176 let mut levels = Vec::with_capacity(self.nlevels());
12177 for level in &self.levels {
12178 let selected = positions
12179 .iter()
12180 .map(|&position| level[position].clone())
12181 .collect();
12182 levels.push(selected);
12183 }
12184
12185 Ok(Self {
12186 levels,
12187 names: self.names.clone(),
12188 })
12189 }
12190
12191 pub fn delete(&self, loc: usize) -> Result<Self, IndexError> {
12195 if loc >= self.len() {
12196 return Err(IndexError::OutOfBounds {
12197 position: loc,
12198 length: self.len(),
12199 });
12200 }
12201 let positions: Vec<usize> = (0..self.len()).filter(|&row| row != loc).collect();
12202 Ok(self.take_existing_positions(&positions))
12203 }
12204
12205 pub fn insert(&self, loc: usize, item: Vec<IndexLabel>) -> Result<Self, IndexError> {
12210 if loc > self.len() {
12211 return Err(IndexError::OutOfBounds {
12212 position: loc,
12213 length: self.len(),
12214 });
12215 }
12216 if self.nlevels() == 0 {
12217 if loc != 0 {
12218 return Err(IndexError::OutOfBounds {
12219 position: loc,
12220 length: 0,
12221 });
12222 }
12223 return Self::from_tuples(vec![item]);
12224 }
12225 if item.len() != self.nlevels() {
12226 return Err(IndexError::LengthMismatch {
12227 expected: self.nlevels(),
12228 actual: item.len(),
12229 context: "insert tuple arity mismatch".to_owned(),
12230 });
12231 }
12232
12233 let mut levels = self.levels.clone();
12234 for (level_idx, label) in item.into_iter().enumerate() {
12235 levels[level_idx].insert(loc, label);
12236 }
12237 Ok(Self {
12238 levels,
12239 names: self.names.clone(),
12240 })
12241 }
12242
12243 pub fn drop(&self, labels_to_drop: &[Vec<IndexLabel>]) -> Result<Self, IndexError> {
12248 for label in labels_to_drop {
12249 self.validate_key_arity(label, false)?;
12250 }
12251 let drop_set: FxHashSet<&Vec<IndexLabel>> = labels_to_drop.iter().collect();
12258 let mut found: FxHashSet<&Vec<IndexLabel>> = FxHashSet::default();
12259 let mut positions = Vec::new();
12260 let mut key: Vec<IndexLabel> = Vec::with_capacity(self.nlevels());
12261 for row in 0..self.len() {
12262 key.clear();
12263 key.extend(self.levels.iter().map(|level| level[row].clone()));
12264 if let Some(matched) = drop_set.get(&key) {
12265 found.insert(*matched);
12266 } else {
12267 positions.push(row);
12268 }
12269 }
12270 if let Some(missing) = labels_to_drop.iter().find(|label| !found.contains(label)) {
12271 return Err(IndexError::InvalidArgument(format!(
12272 "tuple key not found: {:?}",
12273 missing
12274 )));
12275 }
12276 Ok(self.take_existing_positions(&positions))
12277 }
12278
12279 fn validate_key_arity(
12280 &self,
12281 key: &[IndexLabel],
12282 allow_partial: bool,
12283 ) -> Result<(), IndexError> {
12284 let nlevels = self.nlevels();
12285 if key.is_empty() {
12286 return Err(IndexError::InvalidArgument(
12287 "MultiIndex key must contain at least one level".to_owned(),
12288 ));
12289 }
12290 if (!allow_partial && key.len() != nlevels) || (allow_partial && key.len() > nlevels) {
12291 return Err(IndexError::InvalidArgument(format!(
12292 "wrong tuple arity: expected {}{}, got {}",
12293 if allow_partial { "1.." } else { "" },
12294 nlevels,
12295 key.len()
12296 )));
12297 }
12298 Ok(())
12299 }
12300
12301 fn row_matches_prefix(&self, row: usize, key: &[IndexLabel]) -> bool {
12302 key.iter()
12303 .enumerate()
12304 .all(|(level, expected)| &self.levels[level][row] == expected)
12305 }
12306
12307 fn row_prefix_cmp(&self, row: usize, key: &[IndexLabel]) -> std::cmp::Ordering {
12308 for (level, expected) in key.iter().enumerate() {
12309 let ord = self.levels[level][row].cmp(expected);
12310 if ord != std::cmp::Ordering::Equal {
12311 return ord;
12312 }
12313 }
12314 std::cmp::Ordering::Equal
12315 }
12316
12317 pub fn get_loc_tuple(&self, key: &[IndexLabel]) -> Result<Vec<usize>, IndexError> {
12319 self.validate_key_arity(key, false)?;
12320 let positions: Vec<usize> = (0..self.len())
12321 .filter(|&row| self.row_matches_prefix(row, key))
12322 .collect();
12323 if positions.is_empty() {
12324 return Err(IndexError::InvalidArgument(format!(
12325 "tuple key not found: {:?}",
12326 key
12327 )));
12328 }
12329 Ok(positions)
12330 }
12331
12332 pub fn get_loc(
12338 &self,
12339 key: &[IndexLabel],
12340 level: Option<usize>,
12341 ) -> Result<Vec<usize>, IndexError> {
12342 if let Some(level) = level {
12343 if level >= self.nlevels() {
12344 return Err(IndexError::OutOfBounds {
12345 position: level,
12346 length: self.nlevels(),
12347 });
12348 }
12349 if key.len() != 1 {
12350 return Err(IndexError::InvalidArgument(format!(
12351 "level lookup expects exactly one label, got {}",
12352 key.len()
12353 )));
12354 }
12355 let positions: Vec<usize> = self.levels[level]
12356 .iter()
12357 .enumerate()
12358 .filter_map(|(row, label)| if label == &key[0] { Some(row) } else { None })
12359 .collect();
12360 if positions.is_empty() {
12361 return Err(IndexError::InvalidArgument(format!(
12362 "level key not found at level {level}: {:?}",
12363 key[0]
12364 )));
12365 }
12366 return Ok(positions);
12367 }
12368
12369 self.validate_key_arity(key, true)?;
12370 let positions: Vec<usize> = (0..self.len())
12371 .filter(|&row| self.row_matches_prefix(row, key))
12372 .collect();
12373 if positions.is_empty() {
12374 return Err(IndexError::InvalidArgument(format!(
12375 "tuple key not found: {:?}",
12376 key
12377 )));
12378 }
12379 Ok(positions)
12380 }
12381
12382 pub fn get_locs(&self, key: &[IndexLabel]) -> Result<Vec<usize>, IndexError> {
12386 if key.is_empty() {
12387 return Ok(Vec::new());
12388 }
12389 self.get_loc(key, None)
12390 }
12391
12392 pub fn get_loc_level(
12394 &self,
12395 key: &[IndexLabel],
12396 ) -> Result<(Vec<usize>, Option<MultiIndexOrIndex>), IndexError> {
12397 let positions = self.get_loc(key, None)?;
12398 if key.len() == self.nlevels() {
12399 return Ok((positions, None));
12400 }
12401
12402 let mut remaining = MultiIndexOrIndex::Multi(self.take(&positions)?);
12403 for _ in 0..key.len() {
12404 remaining = match remaining {
12405 MultiIndexOrIndex::Multi(mi) => mi.droplevel(0)?,
12406 MultiIndexOrIndex::Index(_) => {
12407 return Err(IndexError::InvalidArgument(
12408 "cannot drop more levels than remain".to_owned(),
12409 ));
12410 }
12411 };
12412 }
12413
12414 Ok((positions, Some(remaining)))
12415 }
12416
12417 pub fn slice_locs(
12421 &self,
12422 start: Option<&[IndexLabel]>,
12423 end: Option<&[IndexLabel]>,
12424 ) -> Result<(usize, usize), IndexError> {
12425 if let Some(start) = start {
12426 self.validate_key_arity(start, true)?;
12427 }
12428 if let Some(end) = end {
12429 self.validate_key_arity(end, true)?;
12430 }
12431
12432 let start_pos = match start {
12433 Some(start_key) => (0..self.len())
12434 .find(|&row| self.row_prefix_cmp(row, start_key) != std::cmp::Ordering::Less)
12435 .unwrap_or(self.len()),
12436 None => 0,
12437 };
12438 let end_pos = match end {
12439 Some(end_key) => (0..self.len())
12440 .rfind(|&row| self.row_prefix_cmp(row, end_key) != std::cmp::Ordering::Greater)
12441 .map_or(0, |row| row + 1),
12442 None => self.len(),
12443 };
12444
12445 if end_pos < start_pos {
12446 return Ok((start_pos, start_pos));
12447 }
12448 Ok((start_pos, end_pos))
12449 }
12450
12451 pub fn get_slice_bound(&self, label: &[IndexLabel], side: &str) -> Result<usize, IndexError> {
12453 match side {
12454 "left" => Ok(self.slice_locs(Some(label), Some(label))?.0),
12455 "right" => Ok(self.slice_locs(Some(label), Some(label))?.1),
12456 other => Err(IndexError::InvalidArgument(format!(
12457 "get_slice_bound: side must be 'left' or 'right', got {other:?}"
12458 ))),
12459 }
12460 }
12461
12462 pub fn slice_indexer(
12464 &self,
12465 start: Option<&[IndexLabel]>,
12466 end: Option<&[IndexLabel]>,
12467 ) -> Result<(usize, usize), IndexError> {
12468 self.slice_locs(start, end)
12469 }
12470
12471 pub fn truncate(
12477 &self,
12478 before: Option<&[IndexLabel]>,
12479 after: Option<&[IndexLabel]>,
12480 ) -> Result<Self, IndexError> {
12481 let (start, stop) = self.slice_locs(before, after)?;
12482 let positions: Vec<usize> = (start..stop).collect();
12483 Ok(self.take_existing_positions(&positions))
12484 }
12485
12486 pub fn searchsorted(&self, target: &Self, side: &str) -> Result<Vec<usize>, IndexError> {
12492 if side != "left" && side != "right" {
12493 return Err(IndexError::InvalidArgument(format!(
12494 "searchsorted: side must be 'left' or 'right', got {side:?}"
12495 )));
12496 }
12497
12498 Ok((0..target.len())
12499 .map(|target_row| {
12500 let needle = target.tuple_at(target_row);
12501 let mut lo = 0_usize;
12502 let mut hi = self.len();
12503 while lo < hi {
12504 let mid = lo + (hi - lo) / 2;
12505 let cmp = self.tuple_at(mid).cmp(&needle);
12506 use std::cmp::Ordering;
12507 let go_right = matches!(
12508 (cmp, side),
12509 (Ordering::Less, _) | (Ordering::Equal, "right")
12510 );
12511 if go_right {
12512 lo = mid + 1;
12513 } else {
12514 hi = mid;
12515 }
12516 }
12517 lo
12518 })
12519 .collect())
12520 }
12521
12522 #[must_use]
12529 fn sorted_packed_keys(&self) -> Option<Vec<u64>> {
12545 let nlev = self.nlevels();
12546 if nlev == 0 {
12547 return None;
12548 }
12549 let n = self.len();
12550 let mut keys = vec![0u64; n];
12551 let mut combined: u128 = 1;
12552 for level in 0..nlev {
12553 let col = &self.levels[level];
12554 let mut sorted: Vec<&IndexLabel> = col.iter().collect::<FxHashSet<_>>().into_iter().collect();
12558 sorted.sort_unstable();
12559 let radix = sorted.len() as u64;
12560 let mut rank: FxHashMap<&IndexLabel, u64> =
12561 FxHashMap::with_capacity_and_hasher(sorted.len(), Default::default());
12562 for (r, value) in sorted.iter().enumerate() {
12563 rank.insert(*value, r as u64);
12564 }
12565 for (dst, value) in keys.iter_mut().zip(col.iter()) {
12566 *dst = dst.checked_mul(radix)?.checked_add(rank[value])?;
12567 }
12568 combined = combined.checked_mul(radix as u128)?;
12569 if combined > u64::MAX as u128 {
12570 return None;
12571 }
12572 }
12573 Some(keys)
12574 }
12575
12576 fn identity_packed_keys(&self) -> Option<Vec<u64>> {
12584 let nlev = self.nlevels();
12585 if nlev == 0 {
12586 return None;
12587 }
12588 let n = self.len();
12589 let mut keys = vec![0u64; n];
12590 let mut combined: u128 = 1;
12591 for level in 0..nlev {
12592 let col = &self.levels[level];
12593 let mut code: FxHashMap<&IndexLabel, u64> =
12594 FxHashMap::with_capacity_and_hasher(col.len(), Default::default());
12595 let mut next = 0u64;
12596 let codes: Vec<u64> = col
12597 .iter()
12598 .map(|value| {
12599 *code.entry(value).or_insert_with(|| {
12600 let c = next;
12601 next += 1;
12602 c
12603 })
12604 })
12605 .collect();
12606 let radix = next;
12607 for (dst, &c) in keys.iter_mut().zip(&codes) {
12608 *dst = dst.checked_mul(radix)?.checked_add(c)?;
12609 }
12610 combined = combined.checked_mul(radix as u128)?;
12611 if combined > u64::MAX as u128 {
12612 return None;
12613 }
12614 }
12615 Some(keys)
12616 }
12617
12618 fn factorize_packed_keys(&self, target: &Self) -> Option<(Vec<u64>, Vec<u64>)> {
12619 let nlev = self.nlevels();
12620 if nlev == 0 || nlev != target.nlevels() {
12621 return None;
12622 }
12623 let n = self.len();
12624 let m = target.len();
12625 let mut src = vec![0u64; n];
12626 let mut tgt = vec![0u64; m];
12627 let mut combined: u128 = 1;
12628 for level in 0..nlev {
12629 let mut codes: FxHashMap<&IndexLabel, u64> = FxHashMap::default();
12630 let mut next = 0u64;
12631 let s_level = &self.levels[level];
12634 let t_level = &target.levels[level];
12635 let s_codes: Vec<u64> = (0..n)
12636 .map(|row| {
12637 *codes.entry(&s_level[row]).or_insert_with(|| {
12638 let c = next;
12639 next += 1;
12640 c
12641 })
12642 })
12643 .collect();
12644 let t_codes: Vec<u64> = (0..m)
12645 .map(|row| {
12646 *codes.entry(&t_level[row]).or_insert_with(|| {
12647 let c = next;
12648 next += 1;
12649 c
12650 })
12651 })
12652 .collect();
12653 let radix = next;
12656 for (dst, &c) in src.iter_mut().zip(&s_codes) {
12657 *dst = dst.checked_mul(radix)?.checked_add(c)?;
12658 }
12659 for (dst, &c) in tgt.iter_mut().zip(&t_codes) {
12660 *dst = dst.checked_mul(radix)?.checked_add(c)?;
12661 }
12662 combined = combined.checked_mul(radix as u128)?;
12663 if combined > u64::MAX as u128 {
12664 return None;
12665 }
12666 }
12667 Some((src, tgt))
12668 }
12669
12670 pub fn get_indexer_non_unique(&self, target: &Self) -> (Vec<isize>, Vec<usize>) {
12671 if self.nlevels() != target.nlevels() {
12672 return (vec![-1; target.len()], (0..target.len()).collect());
12673 }
12674
12675 if let Some((src_keys, tgt_keys)) = self.factorize_packed_keys(target) {
12676 let mut positions =
12677 FxHashMap::<u64, Vec<usize>>::with_capacity_and_hasher(self.len(), Default::default());
12678 for (row, &key) in src_keys.iter().enumerate() {
12679 positions.entry(key).or_default().push(row);
12680 }
12681 let mut indexer = Vec::new();
12682 let mut missing = Vec::new();
12683 for (target_row, &key) in tgt_keys.iter().enumerate() {
12684 if let Some(matches) = positions.get(&key) {
12685 indexer.extend(matches.iter().map(|&pos| pos as isize));
12686 } else {
12687 indexer.push(-1);
12688 missing.push(target_row);
12689 }
12690 }
12691 return (indexer, missing);
12692 }
12693
12694 let mut positions = FxHashMap::<Vec<IndexLabel>, Vec<usize>>::with_capacity_and_hasher(
12695 self.len(),
12696 Default::default(),
12697 );
12698 for row in 0..self.len() {
12699 let key: Vec<IndexLabel> = self.levels.iter().map(|level| level[row].clone()).collect();
12700 positions.entry(key).or_default().push(row);
12701 }
12702
12703 let mut indexer = Vec::new();
12704 let mut missing = Vec::new();
12705 for target_row in 0..target.len() {
12706 let key: Vec<IndexLabel> = target
12707 .levels
12708 .iter()
12709 .map(|level| level[target_row].clone())
12710 .collect();
12711 if let Some(matches) = positions.get(&key) {
12712 indexer.extend(matches.iter().map(|&pos| pos as isize));
12713 } else {
12714 indexer.push(-1);
12715 missing.push(target_row);
12716 }
12717 }
12718
12719 (indexer, missing)
12720 }
12721
12722 pub fn get_indexer(&self, target: &Self) -> Result<Vec<isize>, IndexError> {
12731 if self.has_duplicates() {
12732 return Err(IndexError::InvalidArgument(
12733 "get_indexer requires a uniquely valued MultiIndex".to_owned(),
12734 ));
12735 }
12736 if self.nlevels() != target.nlevels() {
12737 return Ok(vec![-1; target.len()]);
12738 }
12739
12740 if let Some((src_keys, tgt_keys)) = self.factorize_packed_keys(target) {
12741 let mut positions = FxHashMap::<u64, isize>::with_capacity_and_hasher(
12742 self.len(),
12743 Default::default(),
12744 );
12745 for (row, &key) in src_keys.iter().enumerate() {
12746 positions
12747 .entry(key)
12748 .or_insert(isize::try_from(row).unwrap_or(isize::MAX));
12749 }
12750 return Ok(tgt_keys
12751 .iter()
12752 .map(|key| positions.get(key).copied().unwrap_or(-1))
12753 .collect());
12754 }
12755
12756 let mut positions = FxHashMap::<Vec<IndexLabel>, isize>::with_capacity_and_hasher(
12757 self.len(),
12758 Default::default(),
12759 );
12760 for row in 0..self.len() {
12761 positions
12762 .entry(self.tuple_at(row))
12763 .or_insert(isize::try_from(row).unwrap_or(isize::MAX));
12764 }
12765
12766 Ok((0..target.len())
12767 .map(|target_row| {
12768 let key = target.tuple_at(target_row);
12769 positions.get(&key).copied().unwrap_or(-1)
12770 })
12771 .collect())
12772 }
12773
12774 pub fn get_indexer_for(&self, target: &Self) -> Result<Vec<isize>, IndexError> {
12780 if self.has_duplicates() {
12781 Ok(self.get_indexer_non_unique(target).0)
12782 } else {
12783 self.get_indexer(target)
12784 }
12785 }
12786
12787 pub fn reindex(&self, target: &Self) -> Result<(Self, Vec<isize>), IndexError> {
12793 Ok((target.clone(), self.get_indexer(target)?))
12794 }
12795
12796 #[must_use]
12805 pub fn duplicated(&self, keep: DuplicateKeep) -> Vec<bool> {
12806 let len = self.len();
12807 let mut out = vec![false; len];
12808 if len == 0 {
12809 return out;
12810 }
12811 if let Some(keys) = self.identity_packed_keys() {
12821 match keep {
12822 DuplicateKeep::First => {
12823 let mut seen: FxHashSet<u64> =
12824 FxHashSet::with_capacity_and_hasher(len, Default::default());
12825 for (row, slot) in out.iter_mut().enumerate() {
12826 if !seen.insert(keys[row]) {
12827 *slot = true;
12828 }
12829 }
12830 }
12831 DuplicateKeep::Last => {
12832 let mut seen: FxHashSet<u64> =
12833 FxHashSet::with_capacity_and_hasher(len, Default::default());
12834 for row in (0..len).rev() {
12835 if !seen.insert(keys[row]) {
12836 out[row] = true;
12837 }
12838 }
12839 }
12840 DuplicateKeep::None => {
12841 let mut counts: FxHashMap<u64, usize> =
12842 FxHashMap::with_capacity_and_hasher(len, Default::default());
12843 for &key in &keys {
12844 *counts.entry(key).or_insert(0) += 1;
12845 }
12846 for (row, slot) in out.iter_mut().enumerate() {
12847 if counts[&keys[row]] > 1 {
12848 *slot = true;
12849 }
12850 }
12851 }
12852 }
12853 return out;
12854 }
12855
12856 let key_at = |row: usize| -> Vec<IndexLabel> {
12857 self.levels.iter().map(|level| level[row].clone()).collect()
12858 };
12859 match keep {
12860 DuplicateKeep::First => {
12861 let mut seen: FxHashSet<Vec<IndexLabel>> =
12863 FxHashSet::with_capacity_and_hasher(len, Default::default());
12864 for (row, slot) in out.iter_mut().enumerate() {
12865 if !seen.insert(key_at(row)) {
12866 *slot = true;
12867 }
12868 }
12869 }
12870 DuplicateKeep::Last => {
12871 let mut seen: FxHashSet<Vec<IndexLabel>> =
12874 FxHashSet::with_capacity_and_hasher(len, Default::default());
12875 for row in (0..len).rev() {
12876 if !seen.insert(key_at(row)) {
12877 out[row] = true;
12878 }
12879 }
12880 }
12881 DuplicateKeep::None => {
12882 let mut counts: FxHashMap<Vec<IndexLabel>, usize> =
12884 FxHashMap::with_capacity_and_hasher(len, Default::default());
12885 for row in 0..len {
12886 *counts.entry(key_at(row)).or_insert(0) += 1;
12887 }
12888 for (row, slot) in out.iter_mut().enumerate() {
12889 if counts[&key_at(row)] > 1 {
12890 *slot = true;
12891 }
12892 }
12893 }
12894 }
12895 out
12896 }
12897
12898 #[must_use]
12902 pub fn is_unique(&self) -> bool {
12903 !self.duplicated(DuplicateKeep::First).iter().any(|&b| b)
12904 }
12905
12906 #[must_use]
12910 pub fn has_duplicates(&self) -> bool {
12911 !self.is_unique()
12912 }
12913
12914 #[must_use]
12916 pub fn drop_duplicates(&self) -> Self {
12917 self.drop_duplicates_keep(DuplicateKeep::First)
12918 }
12919
12920 #[must_use]
12922 pub fn drop_duplicates_keep(&self, keep: DuplicateKeep) -> Self {
12923 let duplicated = self.duplicated(keep);
12924 let positions: Vec<usize> = duplicated
12925 .iter()
12926 .enumerate()
12927 .filter_map(|(position, is_duplicated)| (!is_duplicated).then_some(position))
12928 .collect();
12929 self.take_existing_positions(&positions)
12930 }
12931
12932 #[must_use]
12934 pub fn unique(&self) -> Self {
12935 self.drop_duplicates_keep(DuplicateKeep::First)
12936 }
12937
12938 #[must_use]
12940 pub fn nunique(&self) -> usize {
12941 self.unique().len()
12942 }
12943
12944 pub fn all(&self) -> Result<bool, IndexError> {
12946 Err(IndexError::InvalidArgument(
12947 "cannot perform all with this index type: MultiIndex".to_owned(),
12948 ))
12949 }
12950
12951 pub fn any(&self) -> Result<bool, IndexError> {
12953 Err(IndexError::InvalidArgument(
12954 "cannot perform any with this index type: MultiIndex".to_owned(),
12955 ))
12956 }
12957
12958 #[must_use]
12963 pub fn factorize(&self) -> (Vec<isize>, Self) {
12964 let mut positions = HashMap::<Vec<IndexLabel>, isize>::new();
12965 let mut uniques = Vec::<Vec<IndexLabel>>::new();
12966 let mut codes = Vec::with_capacity(self.len());
12967 for tuple in self.to_list() {
12968 if let Some(code) = positions.get(&tuple) {
12969 codes.push(*code);
12970 } else {
12971 let code = isize::try_from(uniques.len()).unwrap_or(isize::MAX);
12972 positions.insert(tuple.clone(), code);
12973 uniques.push(tuple);
12974 codes.push(code);
12975 }
12976 }
12977 let mut levels: Vec<Vec<IndexLabel>> = (0..self.nlevels())
12978 .map(|_| Vec::with_capacity(uniques.len()))
12979 .collect();
12980 for tuple in uniques {
12981 for (level_idx, label) in tuple.into_iter().enumerate() {
12982 levels[level_idx].push(label);
12983 }
12984 }
12985 let unique_index = Self {
12986 levels,
12987 names: self.names.clone(),
12988 };
12989 (codes, unique_index)
12990 }
12991
12992 #[must_use]
12994 pub fn value_counts(&self) -> Vec<(Vec<IndexLabel>, usize)> {
12995 let mut counts = HashMap::<Vec<IndexLabel>, usize>::new();
12996 for tuple in self.to_list() {
12997 *counts.entry(tuple).or_insert(0) += 1;
12998 }
12999 let mut pairs: Vec<(Vec<IndexLabel>, usize)> = counts.into_iter().collect();
13000 pairs.sort_by(|(left_tuple, left_count), (right_tuple, right_count)| {
13001 right_count
13002 .cmp(left_count)
13003 .then_with(|| left_tuple.cmp(right_tuple))
13004 });
13005 pairs
13006 }
13007
13008 #[must_use]
13010 pub fn argsort(&self) -> Vec<usize> {
13011 let mut order: Vec<usize> = (0..self.len()).collect();
13012 if let Some(keys) = self.sorted_packed_keys() {
13017 order.sort_by(|&left, &right| {
13018 keys[left].cmp(&keys[right]).then_with(|| left.cmp(&right))
13019 });
13020 return order;
13021 }
13022 order.sort_by(|&left, &right| self.row_cmp(left, right).then_with(|| left.cmp(&right)));
13023 order
13024 }
13025
13026 #[must_use]
13028 pub fn sort_values(&self) -> Self {
13029 self.take_existing_positions(&self.argsort())
13030 }
13031
13032 #[must_use]
13034 pub fn sort(&self) -> Self {
13035 self.sort_values()
13036 }
13037
13038 #[must_use]
13040 pub fn sortlevel(&self) -> (Self, Vec<usize>) {
13041 let order = self.argsort();
13042 (self.take_existing_positions(&order), order)
13043 }
13044
13045 #[must_use]
13047 pub fn min(&self) -> Option<Vec<IndexLabel>> {
13048 self.argsort()
13049 .first()
13050 .map(|&position| self.tuple_at(position))
13051 }
13052
13053 #[must_use]
13055 pub fn max(&self) -> Option<Vec<IndexLabel>> {
13056 self.argsort()
13057 .last()
13058 .map(|&position| self.tuple_at(position))
13059 }
13060
13061 #[must_use]
13063 pub fn argmax(&self) -> Option<usize> {
13064 self.argsort().last().copied()
13065 }
13066
13067 #[must_use]
13069 pub fn argmin(&self) -> Option<usize> {
13070 self.argsort().first().copied()
13071 }
13072
13073 pub fn append(&self, other: &Self) -> Result<Self, IndexError> {
13077 self.ensure_same_nlevels(other)?;
13078 let mut levels = Vec::with_capacity(self.nlevels());
13079 for level_idx in 0..self.nlevels() {
13080 let mut level = self.levels[level_idx].clone();
13081 level.extend(other.levels[level_idx].iter().cloned());
13082 levels.push(level);
13083 }
13084 Ok(Self {
13085 levels,
13086 names: self.shared_names(other),
13087 })
13088 }
13089
13090 #[must_use]
13092 pub fn repeat(&self, repeats: usize) -> Self {
13093 if repeats == 1 {
13094 return self.clone();
13095 }
13096 let mut levels = Vec::with_capacity(self.nlevels());
13097 for level in &self.levels {
13098 let mut repeated = Vec::with_capacity(level.len() * repeats);
13099 for label in level {
13100 for _ in 0..repeats {
13101 repeated.push(label.clone());
13102 }
13103 }
13104 levels.push(repeated);
13105 }
13106 Self {
13107 levels,
13108 names: self.names.clone(),
13109 }
13110 }
13111
13112 #[must_use]
13116 pub fn dropna(&self) -> Self {
13117 self.dropna_any()
13118 }
13119
13120 #[must_use]
13122 pub fn dropna_any(&self) -> Self {
13123 let positions: Vec<usize> = (0..self.len())
13124 .filter(|&row| self.levels.iter().all(|level| !level[row].is_missing()))
13125 .collect();
13126 self.take_existing_positions(&positions)
13127 }
13128
13129 #[must_use]
13131 pub fn dropna_all(&self) -> Self {
13132 let positions: Vec<usize> = (0..self.len())
13133 .filter(|&row| !self.levels.iter().all(|level| level[row].is_missing()))
13134 .collect();
13135 self.take_existing_positions(&positions)
13136 }
13137
13138 pub fn intersection(&self, other: &Self) -> Result<Self, IndexError> {
13140 self.ensure_same_nlevels(other)?;
13141 if let Some((self_keys, other_keys)) = self.factorize_packed_keys(other)
13147 {
13148 let other_set: FxHashSet<u64> = other_keys.into_iter().collect();
13149 let mut seen: FxHashSet<u64> =
13150 FxHashSet::with_capacity_and_hasher(self_keys.len(), Default::default());
13151 let positions: Vec<usize> = self_keys
13152 .iter()
13153 .enumerate()
13154 .filter_map(|(i, &k)| (other_set.contains(&k) && seen.insert(k)).then_some(i))
13155 .collect();
13156 return Ok(self
13157 .take_existing_positions(&positions)
13158 .set_names(self.shared_names(other)));
13159 }
13160 let other_keys: HashMap<Vec<IndexLabel>, ()> = other
13161 .to_list()
13162 .into_iter()
13163 .map(|tuple| (tuple, ()))
13164 .collect();
13165 let mut seen = HashMap::<Vec<IndexLabel>, ()>::new();
13166 let tuples = self
13167 .to_list()
13168 .into_iter()
13169 .filter(|tuple| {
13170 other_keys.contains_key(tuple) && seen.insert(tuple.clone(), ()).is_none()
13171 })
13172 .collect();
13173 Self::from_tuples_with_names(tuples, self.shared_names(other))
13174 }
13175
13176 pub fn union(&self, other: &Self) -> Result<Self, IndexError> {
13178 self.ensure_same_nlevels(other)?;
13179 let mut seen = HashMap::<Vec<IndexLabel>, ()>::new();
13180 let mut tuples = Vec::with_capacity(self.len() + other.len());
13181 for tuple in self.to_list().into_iter().chain(other.to_list()) {
13182 if seen.insert(tuple.clone(), ()).is_none() {
13183 tuples.push(tuple);
13184 }
13185 }
13186 Self::from_tuples_with_names(tuples, self.shared_names(other))
13187 }
13188
13189 pub fn union_with(&self, other: &Self) -> Result<Self, IndexError> {
13191 self.union(other)
13192 }
13193
13194 pub fn difference(&self, other: &Self) -> Result<Self, IndexError> {
13196 self.ensure_same_nlevels(other)?;
13197 if let Some((self_keys, other_keys)) = self.factorize_packed_keys(other)
13200 {
13201 let other_set: FxHashSet<u64> = other_keys.into_iter().collect();
13202 let mut seen: FxHashSet<u64> =
13203 FxHashSet::with_capacity_and_hasher(self_keys.len(), Default::default());
13204 let positions: Vec<usize> = self_keys
13205 .iter()
13206 .enumerate()
13207 .filter_map(|(i, &k)| (!other_set.contains(&k) && seen.insert(k)).then_some(i))
13208 .collect();
13209 return Ok(self
13210 .take_existing_positions(&positions)
13211 .set_names(self.shared_names(other)));
13212 }
13213 let other_keys: HashMap<Vec<IndexLabel>, ()> = other
13214 .to_list()
13215 .into_iter()
13216 .map(|tuple| (tuple, ()))
13217 .collect();
13218 let mut seen = HashMap::<Vec<IndexLabel>, ()>::new();
13219 let tuples = self
13220 .to_list()
13221 .into_iter()
13222 .filter(|tuple| {
13223 !other_keys.contains_key(tuple) && seen.insert(tuple.clone(), ()).is_none()
13224 })
13225 .collect();
13226 Self::from_tuples_with_names(tuples, self.shared_names(other))
13227 }
13228
13229 pub fn symmetric_difference(&self, other: &Self) -> Result<Self, IndexError> {
13231 self.ensure_same_nlevels(other)?;
13232 let self_keys: HashMap<Vec<IndexLabel>, ()> = self
13233 .to_list()
13234 .into_iter()
13235 .map(|tuple| (tuple, ()))
13236 .collect();
13237 let other_keys: HashMap<Vec<IndexLabel>, ()> = other
13238 .to_list()
13239 .into_iter()
13240 .map(|tuple| (tuple, ()))
13241 .collect();
13242 let mut seen = HashMap::<Vec<IndexLabel>, ()>::new();
13243 let mut tuples = Vec::new();
13244 for tuple in self.to_list() {
13245 if !other_keys.contains_key(&tuple) && seen.insert(tuple.clone(), ()).is_none() {
13246 tuples.push(tuple);
13247 }
13248 }
13249 for tuple in other.to_list() {
13250 if !self_keys.contains_key(&tuple) && seen.insert(tuple.clone(), ()).is_none() {
13251 tuples.push(tuple);
13252 }
13253 }
13254 Self::from_tuples_with_names(tuples, self.shared_names(other))
13255 }
13256
13257 #[must_use]
13259 pub fn groupby(&self) -> HashMap<Vec<IndexLabel>, Vec<usize>> {
13260 let mut groups = HashMap::<Vec<IndexLabel>, Vec<usize>>::new();
13261 for row in 0..self.len() {
13262 groups.entry(self.tuple_at(row)).or_default().push(row);
13263 }
13264 groups
13265 }
13266
13267 pub fn join(&self, other: &Self, how: &str) -> Result<Self, IndexError> {
13269 match how {
13270 "left" => Ok(self.clone()),
13271 "right" => Ok(other.clone()),
13272 "inner" => self.intersection(other),
13273 "outer" => self.union(other),
13274 other => Err(IndexError::InvalidArgument(format!(
13275 "join: how must be 'left', 'right', 'inner', or 'outer', got {other:?}"
13276 ))),
13277 }
13278 }
13279
13280 #[must_use]
13288 pub fn isin(&self, values: &[Vec<IndexLabel>]) -> Vec<bool> {
13289 let nlevels = self.nlevels();
13290 let lookup: FxHashSet<&Vec<IndexLabel>> =
13291 values.iter().filter(|v| v.len() == nlevels).collect();
13292 if lookup.is_empty() {
13293 return vec![false; self.len()];
13294 }
13295 let mut key: Vec<IndexLabel> = Vec::with_capacity(nlevels);
13300 let mut out = Vec::with_capacity(self.len());
13301 for row in 0..self.len() {
13302 key.clear();
13303 key.extend(self.levels.iter().map(|level| level[row].clone()));
13304 out.push(lookup.contains(&key));
13305 }
13306 out
13307 }
13308
13309 pub fn isin_level(&self, values: &[IndexLabel], level: usize) -> Result<Vec<bool>, IndexError> {
13315 if level >= self.nlevels() {
13316 return Err(IndexError::OutOfBounds {
13317 position: level,
13318 length: self.nlevels(),
13319 });
13320 }
13321 let lookup: FxHashSet<&IndexLabel> = values.iter().collect();
13322 Ok(self.levels[level]
13323 .iter()
13324 .map(|label| lookup.contains(label))
13325 .collect())
13326 }
13327
13328 pub fn from_tuples(tuples: Vec<Vec<IndexLabel>>) -> Result<Self, IndexError> {
13333 if tuples.is_empty() {
13334 return Ok(Self {
13335 levels: Vec::new(),
13336 names: Vec::new(),
13337 });
13338 }
13339
13340 let nlevels = tuples[0].len();
13341 for (i, t) in tuples.iter().enumerate() {
13342 if t.len() != nlevels {
13343 return Err(IndexError::LengthMismatch {
13344 expected: nlevels,
13345 actual: t.len(),
13346 context: format!("tuple at position {i} has wrong number of levels"),
13347 });
13348 }
13349 }
13350
13351 let mut levels: Vec<Vec<IndexLabel>> = (0..nlevels)
13352 .map(|_| Vec::with_capacity(tuples.len()))
13353 .collect();
13354 for tuple in &tuples {
13355 for (level_idx, label) in tuple.iter().enumerate() {
13356 levels[level_idx].push(label.clone());
13357 }
13358 }
13359
13360 Ok(Self {
13361 levels,
13362 names: vec![None; nlevels],
13363 })
13364 }
13365
13366 pub fn from_arrays(arrays: Vec<Vec<IndexLabel>>) -> Result<Self, IndexError> {
13370 if arrays.is_empty() {
13371 return Ok(Self {
13372 levels: Vec::new(),
13373 names: Vec::new(),
13374 });
13375 }
13376
13377 let expected_len = arrays[0].len();
13378 for (i, arr) in arrays.iter().enumerate() {
13379 if arr.len() != expected_len {
13380 return Err(IndexError::LengthMismatch {
13381 expected: expected_len,
13382 actual: arr.len(),
13383 context: format!("level {i} array length mismatch"),
13384 });
13385 }
13386 }
13387
13388 let nlevels = arrays.len();
13389 Ok(Self {
13390 levels: arrays,
13391 names: vec![None; nlevels],
13392 })
13393 }
13394
13395 pub fn from_frame(columns: Vec<(Option<String>, Vec<IndexLabel>)>) -> Result<Self, IndexError> {
13401 if columns.is_empty() {
13402 return Ok(Self {
13403 levels: Vec::new(),
13404 names: Vec::new(),
13405 });
13406 }
13407
13408 let expected_len = columns[0].1.len();
13409 for (column_idx, (_, values)) in columns.iter().enumerate() {
13410 if values.len() != expected_len {
13411 return Err(IndexError::LengthMismatch {
13412 expected: expected_len,
13413 actual: values.len(),
13414 context: format!("from_frame column {column_idx} length mismatch"),
13415 });
13416 }
13417 }
13418
13419 let mut names = Vec::with_capacity(columns.len());
13420 let mut levels = Vec::with_capacity(columns.len());
13421 for (name, values) in columns {
13422 names.push(name);
13423 levels.push(values);
13424 }
13425
13426 Ok(Self { levels, names })
13427 }
13428
13429 pub fn from_product(iterables: Vec<Vec<IndexLabel>>) -> Result<Self, IndexError> {
13433 if iterables.is_empty() {
13434 return Ok(Self {
13435 levels: Vec::new(),
13436 names: Vec::new(),
13437 });
13438 }
13439
13440 let total: usize = iterables.iter().map(Vec::len).product();
13442 if total == 0 {
13443 let nlevels = iterables.len();
13444 return Ok(Self {
13445 levels: (0..nlevels).map(|_| Vec::new()).collect(),
13446 names: vec![None; nlevels],
13447 });
13448 }
13449
13450 let nlevels = iterables.len();
13451 let mut levels: Vec<Vec<IndexLabel>> =
13452 (0..nlevels).map(|_| Vec::with_capacity(total)).collect();
13453
13454 for pos in 0..total {
13458 let mut remaining = pos;
13459 for (level_idx, iterable) in iterables.iter().enumerate().rev() {
13460 let idx_in_level = remaining % iterable.len();
13461 remaining /= iterable.len();
13462 levels[level_idx].push(iterable[idx_in_level].clone());
13463 }
13464 }
13465
13466 Ok(Self {
13467 levels,
13468 names: vec![None; nlevels],
13469 })
13470 }
13471
13472 #[must_use]
13477 pub fn to_flat_index(&self, sep: &str) -> Index {
13478 let n = self.len();
13479 let labels: Vec<IndexLabel> = (0..n)
13480 .map(|i| {
13481 let parts: Vec<String> = self
13482 .levels
13483 .iter()
13484 .map(|level| level[i].to_string())
13485 .collect();
13486 IndexLabel::Utf8(parts.join(sep))
13487 })
13488 .collect();
13489 Index::new(labels)
13490 }
13491
13492 pub fn droplevel(&self, level: usize) -> Result<MultiIndexOrIndex, IndexError> {
13497 if level >= self.nlevels() {
13498 return Err(IndexError::OutOfBounds {
13499 position: level,
13500 length: self.nlevels(),
13501 });
13502 }
13503 if self.nlevels() <= 1 {
13504 return Err(IndexError::OutOfBounds {
13505 position: level,
13506 length: self.nlevels(),
13507 });
13508 }
13509
13510 let mut new_levels = self.levels.clone();
13511 new_levels.remove(level);
13512 let mut new_names = self.names.clone();
13513 new_names.remove(level);
13514
13515 if new_levels.len() == 1 {
13516 let mut idx = Index::new(new_levels.into_iter().next().unwrap());
13517 if let Some(ref name) = new_names[0] {
13518 idx = idx.set_name(name);
13519 }
13520 Ok(MultiIndexOrIndex::Index(idx))
13521 } else {
13522 Ok(MultiIndexOrIndex::Multi(Self {
13523 levels: new_levels,
13524 names: new_names,
13525 }))
13526 }
13527 }
13528
13529 pub fn swaplevel(&self, i: usize, j: usize) -> Result<Self, IndexError> {
13533 if i >= self.nlevels() || j >= self.nlevels() {
13534 return Err(IndexError::OutOfBounds {
13535 position: i.max(j),
13536 length: self.nlevels(),
13537 });
13538 }
13539 let mut new_levels = self.levels.clone();
13540 let mut new_names = self.names.clone();
13541 new_levels.swap(i, j);
13542 new_names.swap(i, j);
13543 Ok(Self {
13544 levels: new_levels,
13545 names: new_names,
13546 })
13547 }
13548
13549 pub fn reorder_levels(&self, order: &[usize]) -> Result<Self, IndexError> {
13555 if order.len() != self.nlevels() {
13556 return Err(IndexError::LengthMismatch {
13557 expected: self.nlevels(),
13558 actual: order.len(),
13559 context: "reorder_levels: order length must match nlevels".into(),
13560 });
13561 }
13562
13563 let mut seen = vec![false; self.nlevels()];
13565 for &idx in order {
13566 if idx >= self.nlevels() {
13567 return Err(IndexError::OutOfBounds {
13568 position: idx,
13569 length: self.nlevels(),
13570 });
13571 }
13572 if seen[idx] {
13573 return Err(IndexError::LengthMismatch {
13574 expected: self.nlevels(),
13575 actual: order.len(),
13576 context: format!("reorder_levels: duplicate level index {idx}"),
13577 });
13578 }
13579 seen[idx] = true;
13580 }
13581
13582 let new_levels: Vec<Vec<IndexLabel>> =
13583 order.iter().map(|&idx| self.levels[idx].clone()).collect();
13584 let new_names: Vec<Option<String>> =
13585 order.iter().map(|&idx| self.names[idx].clone()).collect();
13586
13587 Ok(Self {
13588 levels: new_levels,
13589 names: new_names,
13590 })
13591 }
13592}
13593
13594#[derive(Debug, Clone, PartialEq)]
13597pub enum MultiIndexOrIndex {
13598 Multi(MultiIndex),
13599 Index(Index),
13600}
13601
13602#[cfg(test)]
13603mod tests {
13604 use fp_types::{Period, PeriodFreq, Scalar, Timedelta};
13605
13606 use super::{
13607 CategoricalIndex, DateOffset, DateRangeError, DatetimeIndex, Index, IndexLabel, MultiIndex,
13608 PeriodFields, PeriodIndex, RangeIndex, TimedeltaIndex, TimedeltaRangeError, align_union,
13609 apply_date_offset, bdate_range, date_range, infer_freq_from_timestamps, timedelta_range,
13610 validate_alignment_plan,
13611 };
13612
13613 fn int64_labels(index: &Index) -> Vec<i64> {
13614 index
13615 .labels()
13616 .iter()
13617 .filter_map(|label| match label {
13618 IndexLabel::Int64(value) => Some(*value),
13619 _ => None,
13620 })
13621 .collect()
13622 }
13623
13624 #[test]
13630 fn index_is_send_and_sync() {
13631 fn assert_send_sync<T: Send + Sync>() {}
13632 assert_send_sync::<Index>();
13633 assert_send_sync::<MultiIndex>();
13634 }
13635
13636 #[test]
13637 fn bdate_range_rolls_weekend_start_forward() {
13638 let idx = bdate_range(Some("2024-01-06"), None, Some(3), None).unwrap();
13639 assert_eq!(
13640 idx.labels(),
13641 &[
13642 IndexLabel::Datetime64(1_704_672_000_000_000_000),
13643 IndexLabel::Datetime64(1_704_758_400_000_000_000),
13644 IndexLabel::Datetime64(1_704_844_800_000_000_000),
13645 ]
13646 );
13647 }
13648
13649 #[test]
13650 fn bdate_range_rolls_weekend_end_backward_and_preserves_name() {
13651 let idx = bdate_range(None, Some("2024-01-07"), Some(3), Some("biz")).unwrap();
13652 assert_eq!(
13653 idx.labels(),
13654 &[
13655 IndexLabel::Datetime64(1_704_240_000_000_000_000),
13656 IndexLabel::Datetime64(1_704_326_400_000_000_000),
13657 IndexLabel::Datetime64(1_704_412_800_000_000_000),
13658 ]
13659 );
13660 assert_eq!(idx.name(), Some("biz"));
13661 }
13662
13663 #[test]
13664 fn timedelta_range_rejects_over_specified_parameters() {
13665 let err = timedelta_range(
13666 Some(Timedelta::NANOS_PER_DAY),
13667 Some(3 * Timedelta::NANOS_PER_DAY),
13668 Some(2),
13669 Timedelta::NANOS_PER_DAY,
13670 None,
13671 )
13672 .expect_err("start + end + periods with explicit freq must fail closed");
13673 assert!(matches!(err, TimedeltaRangeError::TooManyParams));
13674 }
13675
13676 #[test]
13677 fn date_range_rejects_over_specified_parameters() {
13678 let err = date_range(
13679 Some("2020-01-01"),
13680 Some("2020-01-03"),
13681 Some(2),
13682 Timedelta::NANOS_PER_DAY,
13683 None,
13684 )
13685 .expect_err("start + end + periods with explicit freq must fail closed");
13686 assert!(matches!(err, DateRangeError::TooManyParams));
13687 }
13688
13689 #[test]
13690 fn date_range_rejects_generated_timestamp_overflow() {
13691 let err = date_range(
13692 Some("2262-04-11 23:47:16"),
13693 None,
13694 Some(3),
13695 Timedelta::NANOS_PER_SEC,
13696 None,
13697 )
13698 .expect_err("overflow past i64::MAX nanos must fail closed");
13699 assert!(matches!(err, DateRangeError::InvalidRange));
13700 }
13701
13702 #[test]
13703 fn date_range_rejects_backfilled_timestamp_underflow() {
13704 let err = date_range(
13705 None,
13706 Some("1677-09-21 00:12:44"),
13707 Some(3),
13708 Timedelta::NANOS_PER_SEC,
13709 None,
13710 )
13711 .expect_err("underflow before i64::MIN nanos must fail closed");
13712 assert!(matches!(err, DateRangeError::InvalidRange));
13713 }
13714
13715 #[test]
13716 fn date_range_rejects_out_of_bounds_timestamp_parse() {
13717 let err = date_range(
13718 Some("2263-01-01"),
13719 None,
13720 Some(1),
13721 Timedelta::NANOS_PER_DAY,
13722 None,
13723 )
13724 .expect_err("out-of-bounds timestamps must not be coerced to i64::MIN");
13725 assert!(matches!(err, DateRangeError::InvalidRange));
13726 }
13727
13728 #[test]
13729 fn date_offset_business_day_skips_weekend() {
13730 let nanos = apply_date_offset("2024-01-05", DateOffset::BusinessDay(1)).unwrap();
13731 assert_eq!(nanos, 1_704_672_000_000_000_000);
13732 }
13733
13734 #[test]
13735 fn date_offset_month_end_handles_leap_year() {
13736 let nanos = apply_date_offset("2024-02-10", DateOffset::MonthEnd(1)).unwrap();
13737 assert_eq!(nanos, 1_709_164_800_000_000_000);
13738 }
13739
13740 #[test]
13741 fn infer_freq_detects_fixed_and_calendar_offsets() {
13742 assert_eq!(
13743 infer_freq_from_timestamps(&["2024-01-01", "2024-01-03", "2024-01-05"]).unwrap(),
13744 Some("2D".to_owned())
13745 );
13746 assert_eq!(
13747 infer_freq_from_timestamps(&[
13748 "2024-01-01",
13749 "2024-01-02",
13750 "2024-01-03",
13751 "2024-01-04",
13752 "2024-01-05",
13753 "2024-01-08",
13754 "2024-01-09",
13755 ])
13756 .unwrap(),
13757 Some("B".to_owned())
13758 );
13759 assert_eq!(
13760 infer_freq_from_timestamps(&["2024-01-31", "2024-02-29", "2024-03-31"]).unwrap(),
13761 Some("ME".to_owned())
13762 );
13763 }
13764
13765 #[test]
13766 fn infer_freq_returns_none_for_irregular_or_duplicate_values() {
13767 assert_eq!(
13768 infer_freq_from_timestamps(&["2024-01-01", "2024-01-02", "2024-01-04"]).unwrap(),
13769 None
13770 );
13771 assert_eq!(
13772 infer_freq_from_timestamps(&["2024-01-01", "2024-01-02", "2024-01-02"]).unwrap(),
13773 None
13774 );
13775 }
13776
13777 #[test]
13778 fn union_alignment_preserves_left_then_right_unseen_order() {
13779 let left = Index::new(vec![1_i64.into(), 2_i64.into(), 4_i64.into()]);
13780 let right = Index::new(vec![2_i64.into(), 3_i64.into(), 4_i64.into()]);
13781
13782 let plan = align_union(&left, &right);
13783 assert_eq!(
13784 plan.union_index.labels(),
13785 &[
13786 IndexLabel::Int64(1),
13787 IndexLabel::Int64(2),
13788 IndexLabel::Int64(4),
13789 IndexLabel::Int64(3),
13790 ]
13791 );
13792 assert_eq!(plan.left_positions, vec![Some(0), Some(1), Some(2), None]);
13793 assert_eq!(plan.right_positions, vec![None, Some(0), Some(2), Some(1)]);
13794 validate_alignment_plan(&plan).expect("plan must be valid");
13795 }
13796
13797 #[test]
13798 fn duplicate_detection_matches_index_surface() {
13799 let index = Index::new(vec!["a".into(), "a".into(), "b".into()]);
13800 assert!(index.has_duplicates());
13801 }
13802
13803 #[test]
13804 fn has_duplicates_sort_fast_path_matches_hashmap_idxdup() {
13805 let cases: Vec<Vec<IndexLabel>> = vec![
13811 vec![],
13812 vec![5_i64.into()],
13813 vec![1_i64.into(), 2_i64.into(), 3_i64.into()], vec![1_i64.into(), 5_i64.into(), 9_i64.into()], vec![1_i64.into(), 2_i64.into(), 2_i64.into()], vec![3_i64.into(), 1_i64.into(), 2_i64.into()], vec![3_i64.into(), 1_i64.into(), 3_i64.into()], vec![9_i64.into(), 5_i64.into(), 1_i64.into()], vec!["a".into(), "b".into(), "c".into()], vec!["a".into(), "a".into(), "b".into()], vec!["c".into(), "a".into(), "b".into()], ];
13823 for labels in cases {
13824 let expected = super::detect_duplicates(&labels);
13825 let got = Index::new(labels.clone()).has_duplicates();
13826 assert_eq!(got, expected, "mismatch for {labels:?}");
13827 }
13828 }
13829
13830 #[test]
13831 fn dedup_family_sort_fast_path_matches_reference_idxdup() {
13832 let cases: Vec<Vec<IndexLabel>> = vec![
13835 vec![],
13836 vec![7_i64.into()],
13837 vec![1_i64.into(), 2_i64.into(), 3_i64.into()], vec![1_i64.into(), 2_i64.into(), 2_i64.into(), 4_i64.into()], vec![3_i64.into(), 1_i64.into(), 3_i64.into(), 2_i64.into()], vec![9_i64.into(), 5_i64.into(), 1_i64.into()], vec!["a".into(), "b".into(), "c".into()], vec!["b".into(), "a".into(), "b".into()], ];
13844 for labels in cases {
13845 let idx = Index::new(labels.clone());
13846
13847 let mut seen = std::collections::HashSet::new();
13848 let ref_unique: Vec<IndexLabel> = labels
13849 .iter()
13850 .filter(|l| seen.insert((*l).clone()))
13851 .cloned()
13852 .collect();
13853 assert_eq!(idx.unique().labels(), ref_unique.as_slice(), "unique {labels:?}");
13854
13855 let mut seen_f = std::collections::HashSet::new();
13856 let ref_dup_first: Vec<bool> = labels
13857 .iter()
13858 .map(|l| !seen_f.insert(l.clone()))
13859 .collect();
13860 assert_eq!(
13861 idx.duplicated(DuplicateKeep::First),
13862 ref_dup_first,
13863 "duplicated(First) {labels:?}"
13864 );
13865
13866 assert_eq!(
13867 idx.drop_duplicates().labels(),
13868 ref_unique.as_slice(),
13869 "drop_duplicates {labels:?}"
13870 );
13871 }
13872 }
13873
13874 #[test]
13875 fn sorted_merge_set_ops_match_reference_idxdup() {
13876 let s = |v: &[i64]| v.iter().map(|x| IndexLabel::Int64(*x)).collect::<Vec<_>>();
13880 let pairs: Vec<(Vec<IndexLabel>, Vec<IndexLabel>)> = vec![
13881 (s(&[1, 2, 3, 5]), s(&[2, 3, 4])), (s(&[1, 2, 3]), s(&[4, 5, 6])), (s(&[1, 2, 3]), s(&[1, 2, 3])), (s(&[1, 2, 3]), vec![]), (vec![], s(&[1, 2, 3])), (s(&[3, 1, 2]), s(&[2, 3])), (s(&[1, 2, 3]), s(&[3, 1])), (vec!["a".into(), "c".into(), "e".into()], vec!["b".into(), "c".into()]), (
13890 vec![1_i64.into(), 2_i64.into()],
13891 vec!["a".into(), "b".into()],
13892 ), ];
13894 for (a, b) in pairs {
13895 let ia = Index::new(a.clone());
13896 let ib = Index::new(b.clone());
13897 let bset: std::collections::HashSet<IndexLabel> = b.iter().cloned().collect();
13898
13899 let mut seen = std::collections::HashSet::new();
13900 let ref_inter: Vec<IndexLabel> = a
13901 .iter()
13902 .filter(|l| bset.contains(*l) && seen.insert((*l).clone()))
13903 .cloned()
13904 .collect();
13905 assert_eq!(
13906 ia.intersection(&ib).labels(),
13907 ref_inter.as_slice(),
13908 "intersection {a:?} ∩ {b:?}"
13909 );
13910
13911 let mut seen_d = std::collections::HashSet::new();
13912 let ref_diff: Vec<IndexLabel> = a
13913 .iter()
13914 .filter(|l| !bset.contains(*l) && seen_d.insert((*l).clone()))
13915 .cloned()
13916 .collect();
13917 assert_eq!(
13918 ia.difference(&ib).labels(),
13919 ref_diff.as_slice(),
13920 "difference {a:?} \\ {b:?}"
13921 );
13922 }
13923 }
13924
13925 #[test]
13926 fn datetime_timedelta_get_loc_binary_search_matches_linear_idxdup() {
13927 for nanos in [
13931 vec![10_i64, 20, 30, 40, 50], vec![30_i64, 10, 50, 20, 40], ] {
13934 let dt = DatetimeIndex::new(nanos.clone());
13935 let td = TimedeltaIndex::new(nanos.clone());
13936 for q in [10_i64, 20, 30, 40, 50, 0, 99] {
13937 let expected = nanos.iter().position(|n| *n == q);
13938 assert_eq!(dt.get_loc(q).ok(), expected, "datetime nanos={nanos:?} q={q}");
13939 assert_eq!(td.get_loc(q).ok(), expected, "timedelta nanos={nanos:?} q={q}");
13940 }
13941 }
13942 }
13943
13944 #[test]
13945 fn get_indexer_sorted_fast_path_matches_reference_idxdup() {
13946 let s = |v: &[i64]| v.iter().map(|x| IndexLabel::Int64(*x)).collect::<Vec<_>>();
13949 let cases: Vec<(Vec<IndexLabel>, Vec<IndexLabel>)> = vec![
13950 (s(&[1, 2, 3, 4, 5]), s(&[2, 4, 6])), (s(&[1, 2, 3, 4, 5]), s(&[5, 1, 3, 9])), (s(&[3, 1, 5, 2]), s(&[1, 2, 3])), (s(&[1, 2, 3]), vec![]), (vec![], s(&[1, 2])), (
13956 vec!["a".into(), "c".into(), "e".into()],
13957 vec!["c".into(), "z".into(), "a".into()],
13958 ), (s(&[1, 2, 3]), vec!["a".into()]), ];
13961 for (a, b) in cases {
13962 let ia = Index::new(a.clone());
13963 let ib = Index::new(b.clone());
13964 let ref_out: Vec<Option<usize>> =
13965 b.iter().map(|t| a.iter().position(|x| x == t)).collect();
13966 assert_eq!(ia.get_indexer(&ib), ref_out, "get_indexer {a:?} -> {b:?}");
13967 }
13968 }
13969
13970 #[test]
13971 fn known_unique_constructor_seeds_duplicate_cache() {
13972 let index = Index::new_known_unique(vec![1_i64.into(), 2_i64.into(), 3_i64.into()]);
13973 assert_eq!(index.duplicate_cache.get(), Some(&false));
13974 assert!(!index.has_duplicates());
13975 }
13976
13977 #[test]
13978 fn index_equality_ignores_duplicate_cache_state() {
13979 let index_with_cache = Index::new(vec!["a".into(), "a".into(), "b".into()]);
13980 assert!(index_with_cache.has_duplicates());
13981
13982 let fresh_index = Index::new(vec!["a".into(), "a".into(), "b".into()]);
13983 assert_eq!(index_with_cache, fresh_index);
13984 }
13985
13986 #[test]
13987 fn index_label_identity_cache_preserves_equality_contracts() {
13988 let base = Index::new(vec![1_i64.into(), 2_i64.into(), 3_i64.into()]);
13989 let clone = base.clone();
13990 assert_eq!(base.label_identity, clone.label_identity);
13991 assert_eq!(base, clone);
13992
13993 let renamed = clone.rename_index(Some("rows"));
13994 assert_eq!(base.label_identity, renamed.label_identity);
13995 assert!(base.equals(&renamed));
13996 assert!(!base.identical(&renamed));
13997
13998 let independent_equal = Index::new(vec![1_i64.into(), 2_i64.into(), 3_i64.into()]);
13999 assert_ne!(base.label_identity, independent_equal.label_identity);
14000 assert_eq!(base, independent_equal);
14001
14002 let different = Index::new(vec![1_i64.into(), 2_i64.into(), 4_i64.into()]);
14003 assert_ne!(base, different);
14004 assert!(!base.equals(&different));
14005 }
14006
14007 #[test]
14008 fn semantic_fingerprint_cache_reuses_label_result() {
14009 let index = Index::new(vec![1_i64.into(), 2_i64.into(), 3_i64.into()]);
14010 let calls = std::cell::Cell::new(0);
14011
14012 let first = index.semantic_labels_fingerprint_with(|labels| {
14013 calls.set(calls.get() + 1);
14014 format!("labels:{}", labels.len())
14015 });
14016 let second = index.semantic_labels_fingerprint_with(|_| {
14017 calls.set(calls.get() + 1);
14018 "changed".to_owned()
14019 });
14020
14021 assert_eq!(first, "labels:3");
14022 assert_eq!(second, "labels:3");
14023 assert_eq!(calls.get(), 1);
14024 }
14025
14026 #[test]
14027 fn int64_unit_range_index_preserves_materialized_surface() {
14028 let index = Index::new_known_unique_int64_unit_range(-2, 4).rename_index(Some("idx"));
14029 let reference = Index::new(vec![
14030 IndexLabel::Int64(-2),
14031 IndexLabel::Int64(-1),
14032 IndexLabel::Int64(0),
14033 IndexLabel::Int64(1),
14034 ])
14035 .rename_index(Some("idx"));
14036
14037 assert_eq!(index.len(), 4);
14038 assert!(!index.has_duplicates());
14039 assert!(index.is_sorted());
14040 assert_eq!(index.position(&IndexLabel::Int64(0)), Some(2));
14041 assert_eq!(index.position(&IndexLabel::Int64(2)), None);
14042 assert_eq!(index.labels(), reference.labels());
14043 assert_eq!(index, reference);
14044 }
14045
14046 #[test]
14047 fn index_variant_wrappers_expose_public_type_surface() {
14048 let range = RangeIndex::new(1, 7, 2).unwrap().set_name("row");
14049 assert_eq!(range.values(), vec![1, 3, 5]);
14050 assert_eq!(range.to_list(), range.values());
14051 assert_eq!(range.tolist(), range.values());
14052 assert_eq!(range.to_numpy(), range.values());
14053 assert_eq!(range.array(), range.values());
14054 assert_eq!(range.len(), 3);
14055 assert_eq!(range.size(), 3);
14056 assert_eq!(range.shape(), (3,));
14057 assert!(!range.empty());
14058 assert_eq!(range.dtype(), "int64");
14059 assert_eq!(range.dtypes(), vec!["int64"]);
14060 assert_eq!(range.names(), vec![Some("row".to_owned())]);
14061 assert_eq!(range.copy(), range);
14062 assert_eq!(range.rename_index(None).name(), None);
14063 assert_eq!(range.nbytes(), 3 * std::mem::size_of::<i64>());
14064 assert_eq!(range.to_index().name(), Some("row"));
14065 assert!(RangeIndex::new(0, 5, 0).is_err());
14066
14067 let dt = DatetimeIndex::new(vec![1_706_918_400_000_000_000, i64::MIN]).set_name("when");
14068 assert_eq!(dt.year(), vec![Some(2024), None]);
14069 assert_eq!(dt.month(), vec![Some(2), None]);
14070 assert_eq!(dt.day(), vec![Some(3), None]);
14071 assert_eq!(dt.values(), vec![Some(1_706_918_400_000_000_000), None]);
14072 assert_eq!(dt.to_list(), dt.values());
14073 assert_eq!(dt.tolist(), dt.values());
14074 assert_eq!(dt.to_numpy(), dt.values());
14075 assert_eq!(dt.array(), dt.values());
14076 assert_eq!(dt.size(), 2);
14077 assert_eq!(dt.shape(), (2,));
14078 assert!(!dt.empty());
14079 assert_eq!(dt.dtype(), "datetime64[ns]");
14080 assert_eq!(dt.dtypes(), vec!["datetime64[ns]"]);
14081 assert_eq!(dt.names(), vec![Some("when".to_owned())]);
14082 assert_eq!(dt.copy(), dt);
14083 assert!(dt.hasnans());
14084 assert_eq!(dt.isna(), vec![false, true]);
14085 assert_eq!(dt.notna(), vec![true, false]);
14086 assert!(dt.nbytes() <= dt.memory_usage(true));
14087 assert!(DatetimeIndex::from_index(Index::from_i64(vec![1])).is_err());
14088
14089 let td = TimedeltaIndex::new(vec![90_061_000_000_000, Timedelta::NAT]).set_name("delta");
14090 assert_eq!(td.days(), vec![Some(1), None]);
14091 assert_eq!(td.seconds(), vec![Some(3661), None]);
14092 assert_eq!(td.total_seconds(), vec![Some(90061.0), None]);
14093 assert_eq!(td.values(), vec![Some(90_061_000_000_000), None]);
14094 assert_eq!(td.to_list(), td.values());
14095 assert_eq!(td.tolist(), td.values());
14096 assert_eq!(td.to_numpy(), td.values());
14097 assert_eq!(td.array(), td.values());
14098 assert_eq!(td.size(), 2);
14099 assert_eq!(td.shape(), (2,));
14100 assert!(!td.empty());
14101 assert_eq!(td.dtype(), "timedelta64[ns]");
14102 assert_eq!(td.dtypes(), vec!["timedelta64[ns]"]);
14103 assert_eq!(td.names(), vec![Some("delta".to_owned())]);
14104 assert_eq!(td.copy(), td);
14105 assert!(td.hasnans());
14106 assert_eq!(td.isna(), vec![false, true]);
14107 assert_eq!(td.notna(), vec![true, false]);
14108
14109 let period =
14110 PeriodIndex::from_range(Period::new(10, PeriodFreq::Monthly), 3).set_name("period");
14111 assert_eq!(period.freq(), Some(PeriodFreq::Monthly));
14112 assert_eq!(
14113 period.values(),
14114 &[
14115 Period::new(10, PeriodFreq::Monthly),
14116 Period::new(11, PeriodFreq::Monthly),
14117 Period::new(12, PeriodFreq::Monthly),
14118 ]
14119 );
14120 assert_eq!(period.to_list(), period.values());
14121 assert_eq!(period.tolist(), period.values());
14122 assert_eq!(period.to_numpy(), period.values());
14123 assert_eq!(period.array(), period.values());
14124 assert_eq!(period.size(), 3);
14125 assert_eq!(period.shape(), (3,));
14126 assert!(!period.empty());
14127 assert_eq!(period.dtype(), "period[M]");
14128 assert_eq!(period.dtypes(), vec!["period[M]".to_owned()]);
14129 assert_eq!(period.names(), vec![Some("period".to_owned())]);
14130 assert_eq!(period.copy(), period);
14131 assert_eq!(period.rename_index(None).name(), None);
14132 assert!(period.nbytes() <= period.memory_usage(true));
14133 assert_eq!(period.to_index().name(), Some("period"));
14134
14135 let categorical = CategoricalIndex::from_values(
14136 vec!["low".to_owned(), "high".to_owned(), "low".to_owned()],
14137 true,
14138 )
14139 .set_name("priority");
14140 assert_eq!(categorical.categories(), &["low", "high"]);
14141 assert_eq!(categorical.codes(), vec![Some(0), Some(1), Some(0)]);
14142 assert!(categorical.ordered());
14143 assert_eq!(
14144 categorical.values(),
14145 vec!["low".to_owned(), "high".to_owned(), "low".to_owned()]
14146 );
14147 assert_eq!(categorical.to_list(), categorical.values());
14148 assert_eq!(categorical.tolist(), categorical.values());
14149 assert_eq!(categorical.to_numpy(), categorical.values());
14150 assert_eq!(categorical.array(), categorical.values());
14151 assert_eq!(categorical.size(), 3);
14152 assert_eq!(categorical.shape(), (3,));
14153 assert!(!categorical.empty());
14154 assert_eq!(categorical.dtype(), "category");
14155 assert_eq!(categorical.dtypes(), vec!["category"]);
14156 assert_eq!(categorical.names(), vec![Some("priority".to_owned())]);
14157 assert_eq!(categorical.copy(), categorical);
14158 assert_eq!(categorical.isna(), vec![false, false, false]);
14159 assert_eq!(categorical.notna(), vec![true, true, true]);
14160 assert!(categorical.nbytes() <= categorical.memory_usage(true));
14161 assert_eq!(categorical.to_index().name(), Some("priority"));
14162 assert!(
14163 CategoricalIndex::with_categories(
14164 vec!["missing".to_owned()],
14165 vec!["known".to_owned()],
14166 false,
14167 )
14168 .is_err()
14169 );
14170 }
14171
14172 #[test]
14173 fn typed_index_str_accessors_forward_flat_labels_e7ms9() -> Result<(), super::IndexError> {
14174 let flat = Index::new(vec!["Alpha".into(), 1_i64.into(), "".into()]);
14175 assert_eq!(
14176 flat.r#str().lower(),
14177 vec![Some("alpha".to_owned()), None, Some(String::new())]
14178 );
14179
14180 let range = RangeIndex::new(1, 4, 1)?;
14181 assert_eq!(range.r#str().len(), vec![None, None, None]);
14182
14183 let dt = DatetimeIndex::new(vec![1_704_067_200_000_000_000]);
14184 assert_eq!(dt.r#str().upper(), vec![None]);
14185
14186 let td = TimedeltaIndex::new(vec![90_061_000_000_000]);
14187 assert_eq!(td.r#str().contains("day"), vec![None]);
14188
14189 let period = PeriodIndex::from_range(Period::new(10, PeriodFreq::Monthly), 2);
14190 let expected_period_lower: Vec<Option<String>> = period
14191 .format()
14192 .into_iter()
14193 .map(|label| Some(label.to_lowercase()))
14194 .collect();
14195 assert_eq!(period.r#str().lower(), expected_period_lower);
14196
14197 let categorical = CategoricalIndex::from_values(
14198 vec!["Low".to_owned(), "HIGH".to_owned(), String::new()],
14199 false,
14200 );
14201 assert_eq!(
14202 categorical.r#str().lower(),
14203 vec![
14204 Some("low".to_owned()),
14205 Some("high".to_owned()),
14206 Some(String::new())
14207 ]
14208 );
14209 Ok(())
14210 }
14211
14212 #[test]
14213 fn period_index_from_fields_builds_period_ordinals_th1fd() -> Result<(), super::IndexError> {
14214 let years = [2020, 2021];
14215 let months = [1, 2];
14216 let monthly = PeriodIndex::from_fields(PeriodFields {
14217 month: Some(&months),
14218 freq: Some(PeriodFreq::Monthly),
14219 ..PeriodFields::new(&years)
14220 })?;
14221 assert_eq!(
14222 monthly.values(),
14223 &[
14224 Period::new(600, PeriodFreq::Monthly),
14225 Period::new(613, PeriodFreq::Monthly)
14226 ]
14227 );
14228
14229 let quarter_years = [2020];
14230 let quarters = [2];
14231 let quarterly = PeriodIndex::from_fields(PeriodFields {
14232 quarter: Some(&quarters),
14233 ..PeriodFields::new(&quarter_years)
14234 })?;
14235 assert_eq!(
14236 quarterly.values(),
14237 &[Period::new(201, PeriodFreq::Quarterly)]
14238 );
14239
14240 let single_year = [2020];
14241 let single_month = [1];
14242 let weekly = PeriodIndex::from_fields(PeriodFields {
14243 month: Some(&single_month),
14244 freq: Some(PeriodFreq::Weekly),
14245 ..PeriodFields::new(&single_year)
14246 })?;
14247 assert_eq!(weekly.values(), &[Period::new(2_610, PeriodFreq::Weekly)]);
14248
14249 let weekend_day = [4];
14250 let business = PeriodIndex::from_fields(PeriodFields {
14251 month: Some(&single_month),
14252 day: Some(&weekend_day),
14253 freq: Some(PeriodFreq::Business),
14254 ..PeriodFields::new(&single_year)
14255 })?;
14256 assert_eq!(
14257 business.values(),
14258 &[Period::new(13_047, PeriodFreq::Business)]
14259 );
14260
14261 let days = [2];
14262 let hours = [3];
14263 let minutes = [4];
14264 let seconds = [5];
14265 let secondly = PeriodIndex::from_fields(PeriodFields {
14266 month: Some(&single_month),
14267 day: Some(&days),
14268 hour: Some(&hours),
14269 minute: Some(&minutes),
14270 second: Some(&seconds),
14271 freq: Some(PeriodFreq::Secondly),
14272 ..PeriodFields::new(&single_year)
14273 })?;
14274 let expected_date = chrono::NaiveDate::from_ymd_opt(2020, 1, 2)
14275 .ok_or_else(|| super::IndexError::InvalidArgument("invalid test date".to_owned()))?;
14276 let expected_time = chrono::NaiveTime::from_hms_opt(3, 4, 5)
14277 .ok_or_else(|| super::IndexError::InvalidArgument("invalid test time".to_owned()))?;
14278 let expected_nanos =
14279 super::date_and_time_to_nanos(expected_date, super::time_to_nanos(expected_time))
14280 .map_err(super::period_date_error)?;
14281 assert_eq!(
14282 secondly.values(),
14283 &[Period::new(
14284 super::datetime_period_ordinal(expected_nanos, PeriodFreq::Secondly)?,
14285 PeriodFreq::Secondly
14286 )]
14287 );
14288
14289 assert!(
14290 PeriodIndex::from_fields(PeriodFields {
14291 month: Some(&months),
14292 freq: Some(PeriodFreq::Monthly),
14293 ..PeriodFields::new(&single_year)
14294 })
14295 .is_err()
14296 );
14297 let invalid_month = [13];
14298 assert!(
14299 PeriodIndex::from_fields(PeriodFields {
14300 month: Some(&invalid_month),
14301 freq: Some(PeriodFreq::Monthly),
14302 ..PeriodFields::new(&single_year)
14303 })
14304 .is_err()
14305 );
14306 let invalid_day = [99];
14307 assert_eq!(
14308 PeriodIndex::from_fields(PeriodFields {
14309 month: Some(&single_month),
14310 day: Some(&invalid_day),
14311 freq: Some(PeriodFreq::Monthly),
14312 ..PeriodFields::new(&single_year)
14313 })?
14314 .values(),
14315 &[Period::new(600, PeriodFreq::Monthly)]
14316 );
14317 assert!(
14318 PeriodIndex::from_fields(PeriodFields {
14319 quarter: Some(&quarters),
14320 month: Some(&single_month),
14321 freq: Some(PeriodFreq::Monthly),
14322 ..PeriodFields::new(&single_year)
14323 })
14324 .is_err()
14325 );
14326 Ok(())
14327 }
14328
14329 #[test]
14330 fn index_variant_wrappers_expose_identity_and_type_surface() {
14331 let range = RangeIndex::new(1, 7, 2).unwrap().set_name("row");
14332 assert!(range.is_(&range));
14333 assert!(range.equals(&range.copy()));
14334 assert!(range.identical(&range.copy()));
14335 assert!(!range.identical(&range.rename_index(None)));
14336 assert!(range.is_unique());
14337 assert!(!range.has_duplicates());
14338 assert!(range.is_monotonic_increasing());
14339 assert!(!range.is_monotonic_decreasing());
14340 assert_eq!(range.nunique(), 3);
14341 assert_eq!(range.ndim(), 1);
14342 assert_eq!(RangeIndex::new(4, 5, 1).unwrap().item().unwrap(), 4);
14343 assert!(range.item().is_err());
14344 assert!(range.holds_integer());
14345 assert_eq!(range.inferred_type(), "integer");
14346 assert!(range.is_integer());
14347 assert!(range.is_numeric());
14348 assert!(!range.is_boolean());
14349 assert!(!range.is_categorical());
14350 assert!(!range.is_floating());
14351 assert!(!range.is_interval());
14352 assert!(!range.is_object());
14353
14354 let dt = DatetimeIndex::new(vec![1_706_918_400_000_000_000, i64::MIN]).set_name("when");
14355 assert!(dt.is_(&dt));
14356 assert!(dt.equals(&dt.copy()));
14357 assert!(dt.identical(&dt.copy()));
14358 assert!(!dt.identical(&dt.rename_index(None)));
14359 assert!(dt.is_unique());
14360 assert!(!dt.has_duplicates());
14361 assert_eq!(dt.nunique(), 1);
14362 assert_eq!(dt.nunique_with_dropna(false), 2);
14363 assert_eq!(dt.ndim(), 1);
14364 assert_eq!(
14365 DatetimeIndex::new(vec![1_706_918_400_000_000_000])
14366 .item()
14367 .unwrap(),
14368 Some(1_706_918_400_000_000_000)
14369 );
14370 assert_eq!(DatetimeIndex::new(vec![i64::MIN]).item().unwrap(), None);
14371 assert_eq!(dt.inferred_type(), "datetime64");
14372 assert!(!dt.holds_integer());
14373 assert!(!dt.is_integer());
14374 assert!(!dt.is_numeric());
14375 assert!(!dt.is_boolean());
14376 assert!(!dt.is_categorical());
14377 assert!(!dt.is_floating());
14378 assert!(!dt.is_interval());
14379 assert!(!dt.is_object());
14380 assert!(DatetimeIndex::new(vec![1, 2]).is_monotonic_increasing());
14381 assert!(DatetimeIndex::new(vec![2, 1]).is_monotonic_decreasing());
14382
14383 let td = TimedeltaIndex::new(vec![1, Timedelta::NAT]).set_name("delta");
14384 assert!(td.is_(&td));
14385 assert!(td.equals(&td.copy()));
14386 assert!(td.identical(&td.copy()));
14387 assert!(!td.identical(&td.rename_index(None)));
14388 assert!(td.is_unique());
14389 assert_eq!(td.nunique(), 1);
14390 assert_eq!(td.nunique_with_dropna(false), 2);
14391 assert_eq!(td.ndim(), 1);
14392 assert_eq!(TimedeltaIndex::new(vec![7]).item().unwrap(), Some(7));
14393 assert_eq!(
14394 TimedeltaIndex::new(vec![Timedelta::NAT]).item().unwrap(),
14395 None
14396 );
14397 assert_eq!(td.inferred_type(), "timedelta64");
14398 assert!(!td.holds_integer());
14399 assert!(!td.is_integer());
14400 assert!(!td.is_numeric());
14401 assert!(!td.is_boolean());
14402 assert!(!td.is_categorical());
14403 assert!(!td.is_floating());
14404 assert!(!td.is_interval());
14405 assert!(!td.is_object());
14406 assert!(TimedeltaIndex::new(vec![1, 2]).is_monotonic_increasing());
14407 assert!(TimedeltaIndex::new(vec![2, 1]).is_monotonic_decreasing());
14408
14409 let period =
14410 PeriodIndex::from_range(Period::new(10, PeriodFreq::Monthly), 3).set_name("period");
14411 assert!(period.is_(&period));
14412 assert!(period.equals(&period.copy()));
14413 assert!(period.identical(&period.copy()));
14414 assert!(!period.identical(&period.rename_index(None)));
14415 assert!(period.is_unique());
14416 assert!(!period.has_duplicates());
14417 assert!(period.is_monotonic_increasing());
14418 assert!(!period.is_monotonic_decreasing());
14419 assert_eq!(period.nunique(), 3);
14420 assert_eq!(period.ndim(), 1);
14421 assert_eq!(
14422 PeriodIndex::new(vec![Period::new(42, PeriodFreq::Daily)])
14423 .item()
14424 .unwrap(),
14425 Period::new(42, PeriodFreq::Daily)
14426 );
14427 assert_eq!(period.inferred_type(), "period");
14428 assert!(!period.holds_integer());
14429 assert!(!period.is_integer());
14430 assert!(!period.is_numeric());
14431 assert!(!period.is_boolean());
14432 assert!(!period.is_categorical());
14433 assert!(!period.is_floating());
14434 assert!(!period.is_interval());
14435 assert!(!period.is_object());
14436
14437 let categorical = CategoricalIndex::from_values(
14438 vec!["low".to_owned(), "high".to_owned(), "low".to_owned()],
14439 true,
14440 )
14441 .set_name("priority");
14442 assert!(categorical.is_(&categorical));
14443 assert!(categorical.equals(&categorical.copy()));
14444 assert!(categorical.identical(&categorical.copy()));
14445 assert!(!categorical.identical(&categorical.rename_index(None)));
14446 assert!(!categorical.is_unique());
14447 assert!(categorical.has_duplicates());
14448 assert_eq!(categorical.nunique(), 2);
14449 assert_eq!(categorical.ndim(), 1);
14450 assert_eq!(
14451 CategoricalIndex::from_values(vec!["high".to_owned()], true)
14452 .item()
14453 .unwrap(),
14454 "high"
14455 );
14456 assert_eq!(categorical.inferred_type(), "categorical");
14457 assert!(!categorical.holds_integer());
14458 assert!(!categorical.is_integer());
14459 assert!(!categorical.is_numeric());
14460 assert!(!categorical.is_boolean());
14461 assert!(categorical.is_categorical());
14462 assert!(!categorical.is_floating());
14463 assert!(!categorical.is_interval());
14464 assert!(!categorical.is_object());
14465 assert!(!categorical.is_monotonic_increasing());
14466 assert!(!categorical.is_monotonic_decreasing());
14467 assert!(
14468 CategoricalIndex::from_values(vec!["low".to_owned(), "high".to_owned()], true)
14469 .is_monotonic_increasing()
14470 );
14471 }
14472
14473 #[test]
14476 fn sorted_int64_index_detected() {
14477 let index = Index::from_i64(vec![1, 2, 3, 4, 5]);
14478 assert!(index.is_sorted());
14479 }
14480
14481 #[test]
14482 fn unsorted_int64_index_detected() {
14483 let index = Index::from_i64(vec![3, 1, 2]);
14484 assert!(!index.is_sorted());
14485 }
14486
14487 #[test]
14488 fn sorted_utf8_index_detected() {
14489 let index = Index::from_utf8(vec!["a".into(), "b".into(), "c".into()]);
14490 assert!(index.is_sorted());
14491 }
14492
14493 #[test]
14494 fn unsorted_utf8_index_detected() {
14495 let index = Index::from_utf8(vec!["c".into(), "a".into(), "b".into()]);
14496 assert!(!index.is_sorted());
14497 }
14498
14499 #[test]
14500 fn duplicate_int64_is_not_sorted() {
14501 let index = Index::from_i64(vec![1, 2, 2, 3]);
14502 assert!(!index.is_sorted());
14503 }
14504
14505 #[test]
14506 fn empty_index_is_sorted() {
14507 let index = Index::new(vec![]);
14508 assert!(index.is_sorted());
14509 }
14510
14511 #[test]
14512 fn single_element_is_sorted() {
14513 let index = Index::from_i64(vec![42]);
14514 assert!(index.is_sorted());
14515 }
14516
14517 #[test]
14518 fn binary_search_position_sorted_int64() {
14519 let index = Index::from_i64(vec![10, 20, 30, 40, 50]);
14520 assert_eq!(index.position(&IndexLabel::Int64(10)), Some(0));
14521 assert_eq!(index.position(&IndexLabel::Int64(30)), Some(2));
14522 assert_eq!(index.position(&IndexLabel::Int64(50)), Some(4));
14523 assert_eq!(index.position(&IndexLabel::Int64(25)), None);
14524 assert_eq!(index.position(&IndexLabel::Int64(0)), None);
14525 assert_eq!(index.position(&IndexLabel::Int64(100)), None);
14526 }
14527
14528 #[test]
14529 fn binary_search_position_sorted_utf8() {
14530 let index = Index::from_utf8(vec!["apple".into(), "banana".into(), "cherry".into()]);
14531 assert_eq!(index.position(&IndexLabel::Utf8("apple".into())), Some(0));
14532 assert_eq!(index.position(&IndexLabel::Utf8("banana".into())), Some(1));
14533 assert_eq!(index.position(&IndexLabel::Utf8("cherry".into())), Some(2));
14534 assert_eq!(index.position(&IndexLabel::Utf8("date".into())), None);
14535 }
14536
14537 #[test]
14538 fn type_mismatch_returns_none() {
14539 let int_index = Index::from_i64(vec![1, 2, 3]);
14540 assert_eq!(int_index.position(&IndexLabel::Utf8("1".into())), None);
14542
14543 let utf8_index = Index::from_utf8(vec!["a".into(), "b".into()]);
14544 assert_eq!(utf8_index.position(&IndexLabel::Int64(1)), None);
14546 }
14547
14548 #[test]
14549 fn linear_fallback_for_unsorted_index() {
14550 let index = Index::from_i64(vec![30, 10, 20]);
14551 assert!(!index.is_sorted());
14552 assert_eq!(index.position(&IndexLabel::Int64(30)), Some(0));
14553 assert_eq!(index.position(&IndexLabel::Int64(10)), Some(1));
14554 assert_eq!(index.position(&IndexLabel::Int64(20)), Some(2));
14555 assert_eq!(index.position(&IndexLabel::Int64(99)), None);
14556 }
14557
14558 #[test]
14559 fn binary_search_large_sorted_index() {
14560 let labels: Vec<i64> = (0..10_000).collect();
14562 let index = Index::from_i64(labels);
14563 assert!(index.is_sorted());
14564
14565 assert_eq!(index.position(&IndexLabel::Int64(0)), Some(0));
14567 assert_eq!(index.position(&IndexLabel::Int64(5000)), Some(5000));
14568 assert_eq!(index.position(&IndexLabel::Int64(9999)), Some(9999));
14569 assert_eq!(index.position(&IndexLabel::Int64(10_000)), None);
14570 assert_eq!(index.position(&IndexLabel::Int64(-1)), None);
14571 }
14572
14573 #[test]
14574 fn sort_detection_is_cached() {
14575 let index = Index::from_i64(vec![1, 2, 3]);
14576 assert!(index.is_sorted());
14578 assert!(index.is_sorted());
14580 }
14581
14582 #[test]
14583 fn mixed_label_types_are_unsorted() {
14584 let index = Index::new(vec![IndexLabel::Int64(1), IndexLabel::Utf8("a".into())]);
14585 assert!(!index.is_sorted());
14586 }
14587
14588 #[test]
14589 fn position_consistent_sorted_vs_unsorted() {
14590 let sorted = Index::from_i64(vec![5, 10, 15, 20, 25]);
14593 assert!(sorted.is_sorted());
14594
14595 for &target in &[5, 10, 15, 20, 25, 0, 12, 30] {
14596 let needle = IndexLabel::Int64(target);
14597 let expected = sorted.labels().iter().position(|l| l == &needle);
14598 assert_eq!(
14599 sorted.position(&needle),
14600 expected,
14601 "mismatch for target={target}"
14602 );
14603 }
14604 }
14605
14606 use super::{AlignMode, align, align_inner, align_left};
14609
14610 #[test]
14611 fn align_inner_keeps_only_overlapping_labels() {
14612 let left = Index::new(vec![1_i64.into(), 2_i64.into(), 3_i64.into()]);
14613 let right = Index::new(vec![2_i64.into(), 3_i64.into(), 4_i64.into()]);
14614
14615 let plan = align_inner(&left, &right);
14616 assert_eq!(
14617 plan.union_index.labels(),
14618 &[IndexLabel::Int64(2), IndexLabel::Int64(3)]
14619 );
14620 assert_eq!(plan.left_positions, vec![Some(1), Some(2)]);
14621 assert_eq!(plan.right_positions, vec![Some(0), Some(1)]);
14622 validate_alignment_plan(&plan).expect("valid");
14623 }
14624
14625 #[test]
14626 fn align_inner_disjoint_yields_empty() {
14627 let left = Index::new(vec![1_i64.into(), 2_i64.into()]);
14628 let right = Index::new(vec![3_i64.into(), 4_i64.into()]);
14629
14630 let plan = align_inner(&left, &right);
14631 assert!(plan.union_index.is_empty());
14632 assert!(plan.left_positions.is_empty());
14633 assert!(plan.right_positions.is_empty());
14634 }
14635
14636 #[test]
14637 fn align_left_preserves_all_left_labels() {
14638 let left = Index::new(vec!["a".into(), "b".into(), "c".into()]);
14639 let right = Index::new(vec!["b".into(), "d".into()]);
14640
14641 let plan = align_left(&left, &right);
14642 assert_eq!(
14643 plan.union_index.labels(),
14644 &["a".into(), "b".into(), "c".into()]
14645 );
14646 assert_eq!(plan.left_positions, vec![Some(0), Some(1), Some(2)]);
14647 assert_eq!(plan.right_positions, vec![None, Some(0), None]);
14648 validate_alignment_plan(&plan).expect("valid");
14649 }
14650
14651 #[test]
14652 fn align_right_preserves_all_right_labels() {
14653 let left = Index::new(vec!["a".into(), "b".into()]);
14654 let right = Index::new(vec!["b".into(), "c".into(), "d".into()]);
14655
14656 let plan = align(&left, &right, AlignMode::Right);
14657 assert_eq!(
14658 plan.union_index.labels(),
14659 &["b".into(), "c".into(), "d".into()]
14660 );
14661 assert_eq!(plan.left_positions, vec![Some(1), None, None]);
14663 assert_eq!(plan.right_positions, vec![Some(0), Some(1), Some(2)]);
14664 }
14665
14666 #[test]
14667 fn align_mode_outer_matches_union() {
14668 let left = Index::new(vec![1_i64.into(), 2_i64.into()]);
14669 let right = Index::new(vec![2_i64.into(), 3_i64.into()]);
14670
14671 let plan_outer = align(&left, &right, AlignMode::Outer);
14672 let plan_union = align_union(&left, &right);
14673 assert_eq!(plan_outer, plan_union);
14674 }
14675
14676 #[test]
14677 fn align_inner_duplicate_labels_cartesian() {
14678 let left = Index::new(vec!["a".into(), "b".into(), "a".into()]);
14679 let right = Index::new(vec!["a".into(), "a".into(), "c".into()]);
14680
14681 let plan = align_inner(&left, &right);
14682 assert_eq!(
14683 plan.union_index.labels(),
14684 &["a".into(), "a".into(), "a".into(), "a".into()]
14685 );
14686 assert_eq!(
14687 plan.left_positions,
14688 vec![Some(0), Some(0), Some(2), Some(2)]
14689 );
14690 assert_eq!(
14691 plan.right_positions,
14692 vec![Some(0), Some(1), Some(0), Some(1)]
14693 );
14694 validate_alignment_plan(&plan).expect("valid");
14695 }
14696
14697 #[test]
14698 fn align_left_duplicate_labels_expand_right_matches() {
14699 let left = Index::new(vec!["a".into(), "b".into(), "a".into()]);
14700 let right = Index::new(vec!["a".into(), "a".into(), "c".into()]);
14701
14702 let plan = align_left(&left, &right);
14703 assert_eq!(
14704 plan.union_index.labels(),
14705 &["a".into(), "a".into(), "b".into(), "a".into(), "a".into()]
14706 );
14707 assert_eq!(
14708 plan.left_positions,
14709 vec![Some(0), Some(0), Some(1), Some(2), Some(2)]
14710 );
14711 assert_eq!(
14712 plan.right_positions,
14713 vec![Some(0), Some(1), None, Some(0), Some(1)]
14714 );
14715 validate_alignment_plan(&plan).expect("valid");
14716 }
14717
14718 #[test]
14719 fn align_right_duplicate_labels_expand_left_matches() {
14720 let left = Index::new(vec!["a".into(), "b".into(), "a".into()]);
14721 let right = Index::new(vec!["a".into(), "a".into(), "c".into()]);
14722
14723 let plan = align(&left, &right, AlignMode::Right);
14724 assert_eq!(
14725 plan.union_index.labels(),
14726 &["a".into(), "a".into(), "a".into(), "a".into(), "c".into()]
14727 );
14728 assert_eq!(
14729 plan.left_positions,
14730 vec![Some(0), Some(2), Some(0), Some(2), None]
14731 );
14732 assert_eq!(
14733 plan.right_positions,
14734 vec![Some(0), Some(0), Some(1), Some(1), Some(2)]
14735 );
14736 validate_alignment_plan(&plan).expect("valid");
14737 }
14738
14739 #[test]
14740 fn align_outer_duplicate_labels_preserves_left_order_and_right_only() {
14741 let left = Index::new(vec!["a".into(), "b".into(), "a".into()]);
14742 let right = Index::new(vec!["a".into(), "a".into(), "c".into()]);
14743
14744 let plan = align_union(&left, &right);
14745 assert_eq!(
14746 plan.union_index.labels(),
14747 &[
14748 "a".into(),
14749 "a".into(),
14750 "b".into(),
14751 "a".into(),
14752 "a".into(),
14753 "c".into()
14754 ]
14755 );
14756 assert_eq!(
14757 plan.left_positions,
14758 vec![Some(0), Some(0), Some(1), Some(2), Some(2), None]
14759 );
14760 assert_eq!(
14761 plan.right_positions,
14762 vec![Some(0), Some(1), None, Some(0), Some(1), Some(2)]
14763 );
14764 validate_alignment_plan(&plan).expect("valid");
14765 }
14766
14767 #[test]
14768 fn align_inner_identical_indexes() {
14769 let left = Index::new(vec!["x".into(), "y".into()]);
14770 let right = Index::new(vec!["x".into(), "y".into()]);
14771
14772 let plan = align_inner(&left, &right);
14773 assert_eq!(plan.union_index.labels(), &["x".into(), "y".into()]);
14774 assert_eq!(plan.left_positions, vec![Some(0), Some(1)]);
14775 assert_eq!(plan.right_positions, vec![Some(0), Some(1)]);
14776 }
14777
14778 #[test]
14779 fn align_left_identical_indexes() {
14780 let left = Index::new(vec![1_i64.into(), 2_i64.into()]);
14781 let right = Index::new(vec![1_i64.into(), 2_i64.into()]);
14782
14783 let plan = align_left(&left, &right);
14784 assert_eq!(plan.union_index.labels(), left.labels());
14785 assert_eq!(plan.left_positions, vec![Some(0), Some(1)]);
14786 assert_eq!(plan.right_positions, vec![Some(0), Some(1)]);
14787 }
14788
14789 #[test]
14790 fn align_inner_empty_input() {
14791 let left = Index::new(Vec::new());
14792 let right = Index::new(vec![1_i64.into()]);
14793
14794 let plan = align_inner(&left, &right);
14795 assert!(plan.union_index.is_empty());
14796 }
14797
14798 #[test]
14799 fn align_left_empty_left() {
14800 let left = Index::new(Vec::new());
14801 let right = Index::new(vec![1_i64.into()]);
14802
14803 let plan = align_left(&left, &right);
14804 assert!(plan.union_index.is_empty());
14805 }
14806
14807 use super::DuplicateKeep;
14810
14811 #[test]
14812 fn contains_finds_existing_label() {
14813 let index = Index::from_i64(vec![10, 20, 30]);
14814 assert!(index.contains(&IndexLabel::Int64(20)));
14815 assert!(!index.contains(&IndexLabel::Int64(99)));
14816 }
14817
14818 #[test]
14819 fn get_indexer_bulk_lookup() {
14820 let index = Index::new(vec!["a".into(), "b".into(), "c".into()]);
14821 let target = Index::new(vec!["c".into(), "a".into(), "z".into()]);
14822 assert_eq!(index.get_indexer(&target), vec![Some(2), Some(0), None]);
14823 }
14824
14825 #[test]
14826 fn isin_membership_mask() {
14827 let index = Index::from_i64(vec![1, 2, 3, 4, 5]);
14828 let values = vec![IndexLabel::Int64(2), IndexLabel::Int64(4)];
14829 assert_eq!(index.isin(&values), vec![false, true, false, true, false]);
14830 }
14831
14832 #[test]
14833 fn unique_preserves_first_seen_order() {
14834 let index = Index::new(vec![
14835 "b".into(),
14836 "a".into(),
14837 "b".into(),
14838 "c".into(),
14839 "a".into(),
14840 ]);
14841 let uniq = index.unique();
14842 assert_eq!(uniq.labels(), &["b".into(), "a".into(), "c".into()]);
14843 }
14844
14845 #[test]
14846 fn duplicated_keep_first() {
14847 let index = Index::from_i64(vec![1, 2, 1, 3, 2]);
14848 assert_eq!(
14849 index.duplicated(DuplicateKeep::First),
14850 vec![false, false, true, false, true]
14851 );
14852 }
14853
14854 #[test]
14855 fn duplicated_keep_last() {
14856 let index = Index::from_i64(vec![1, 2, 1, 3, 2]);
14857 assert_eq!(
14858 index.duplicated(DuplicateKeep::Last),
14859 vec![true, true, false, false, false]
14860 );
14861 }
14862
14863 #[test]
14864 fn duplicated_keep_none_marks_all() {
14865 let index = Index::from_i64(vec![1, 2, 1, 3, 2]);
14866 assert_eq!(
14867 index.duplicated(DuplicateKeep::None),
14868 vec![true, true, true, false, true]
14869 );
14870 }
14871
14872 #[test]
14873 fn drop_duplicates_equals_unique() {
14874 let index = Index::from_i64(vec![3, 1, 3, 2, 1]);
14875 assert_eq!(index.drop_duplicates(), index.unique());
14876 }
14877
14878 #[test]
14879 fn index_drop_duplicates_keep_last() {
14880 let index = Index::new(vec![
14881 "llama".into(),
14882 "cow".into(),
14883 "llama".into(),
14884 "beetle".into(),
14885 "llama".into(),
14886 "hippo".into(),
14887 ])
14888 .set_names(Some("animals"));
14889
14890 let deduped = index.drop_duplicates_keep(DuplicateKeep::Last);
14891
14892 assert_eq!(
14893 deduped.labels(),
14894 &[
14895 IndexLabel::from("cow"),
14896 IndexLabel::from("beetle"),
14897 IndexLabel::from("llama"),
14898 IndexLabel::from("hippo"),
14899 ]
14900 );
14901 assert_eq!(deduped.name(), Some("animals"));
14902 }
14903
14904 #[test]
14905 fn index_drop_duplicates_keep_none_discards_all_duplicates() {
14906 let index = Index::new(vec![
14907 "llama".into(),
14908 "cow".into(),
14909 "llama".into(),
14910 "beetle".into(),
14911 "llama".into(),
14912 "hippo".into(),
14913 ]);
14914
14915 let deduped = index.drop_duplicates_keep(DuplicateKeep::None);
14916
14917 assert_eq!(
14918 deduped.labels(),
14919 &[
14920 IndexLabel::from("cow"),
14921 IndexLabel::from("beetle"),
14922 IndexLabel::from("hippo"),
14923 ]
14924 );
14925 }
14926
14927 #[test]
14928 fn intersection_preserves_left_order() {
14929 let left = Index::new(vec!["c".into(), "a".into(), "b".into()]);
14930 let right = Index::new(vec!["b".into(), "d".into(), "a".into()]);
14931 let result = left.intersection(&right);
14932 assert_eq!(result.labels(), &["a".into(), "b".into()]);
14933 }
14934
14935 #[test]
14936 fn intersection_deduplicates() {
14937 let left = Index::from_i64(vec![1, 1, 2]);
14938 let right = Index::from_i64(vec![1, 2, 2]);
14939 let result = left.intersection(&right);
14940 assert_eq!(
14941 result.labels(),
14942 &[IndexLabel::Int64(1), IndexLabel::Int64(2)]
14943 );
14944 }
14945
14946 #[test]
14947 fn union_with_combines_unique_labels() {
14948 let left = Index::from_i64(vec![1, 2, 3]);
14949 let right = Index::from_i64(vec![2, 4, 3]);
14950 let result = left.union_with(&right);
14951 assert_eq!(
14952 result.labels(),
14953 &[
14954 IndexLabel::Int64(1),
14955 IndexLabel::Int64(2),
14956 IndexLabel::Int64(3),
14957 IndexLabel::Int64(4),
14958 ]
14959 );
14960 }
14961
14962 #[test]
14963 fn difference_removes_other_labels() {
14964 let left = Index::from_i64(vec![1, 2, 3, 4]);
14965 let right = Index::from_i64(vec![2, 4]);
14966 let result = left.difference(&right);
14967 assert_eq!(
14968 result.labels(),
14969 &[IndexLabel::Int64(1), IndexLabel::Int64(3)]
14970 );
14971 }
14972
14973 #[test]
14974 fn difference_preserves_self_name_even_when_other_differs_6r1lq() {
14975 let left = Index::from_i64(vec![1, 2, 3]).set_name("left_axis");
14978 let right = Index::from_i64(vec![2, 3, 4]).set_name("right_axis");
14979 let result = left.difference(&right);
14980 assert_eq!(result.name(), Some("left_axis"));
14981 }
14982
14983 #[test]
14984 fn symmetric_difference_xor() {
14985 let left = Index::from_i64(vec![1, 2, 3]);
14986 let right = Index::from_i64(vec![2, 3, 4]);
14987 let result = left.symmetric_difference(&right);
14988 assert_eq!(
14989 result.labels(),
14990 &[IndexLabel::Int64(1), IndexLabel::Int64(4)]
14991 );
14992 }
14993
14994 #[test]
14995 fn argsort_returns_sorting_indices() {
14996 let index = Index::from_i64(vec![30, 10, 20]);
14997 assert_eq!(index.argsort(), vec![1, 2, 0]);
14998 }
14999
15000 #[test]
15001 fn sort_values_produces_sorted_index() {
15002 let index = Index::new(vec!["c".into(), "a".into(), "b".into()]);
15003 let sorted = index.sort_values();
15004 assert_eq!(sorted.labels(), &["a".into(), "b".into(), "c".into()]);
15005 }
15006
15007 #[test]
15008 fn take_selects_by_position() {
15009 let index = Index::from_i64(vec![10, 20, 30, 40, 50]);
15010 let taken = index.take(&[4, 0, 2]);
15011 assert_eq!(
15012 taken.labels(),
15013 &[
15014 IndexLabel::Int64(50),
15015 IndexLabel::Int64(10),
15016 IndexLabel::Int64(30),
15017 ]
15018 );
15019 }
15020
15021 #[test]
15022 fn slice_extracts_subrange() {
15023 let index = Index::from_i64(vec![10, 20, 30, 40, 50]);
15024 let sliced = index.slice(1, 3);
15025 assert_eq!(
15026 sliced.labels(),
15027 &[
15028 IndexLabel::Int64(20),
15029 IndexLabel::Int64(30),
15030 IndexLabel::Int64(40),
15031 ]
15032 );
15033 }
15034
15035 #[test]
15036 fn slice_clamps_to_bounds() {
15037 let index = Index::from_i64(vec![1, 2, 3]);
15038 let sliced = index.slice(1, 100);
15039 assert_eq!(
15040 sliced.labels(),
15041 &[IndexLabel::Int64(2), IndexLabel::Int64(3)]
15042 );
15043 }
15044
15045 #[test]
15046 fn from_range_basic() {
15047 let index = Index::from_range(0, 5, 1);
15048 assert_eq!(
15049 index.labels(),
15050 &[
15051 IndexLabel::Int64(0),
15052 IndexLabel::Int64(1),
15053 IndexLabel::Int64(2),
15054 IndexLabel::Int64(3),
15055 IndexLabel::Int64(4),
15056 ]
15057 );
15058 }
15059
15060 #[test]
15061 fn from_range_step_2() {
15062 let index = Index::from_range(0, 10, 3);
15063 assert_eq!(
15064 index.labels(),
15065 &[
15066 IndexLabel::Int64(0),
15067 IndexLabel::Int64(3),
15068 IndexLabel::Int64(6),
15069 IndexLabel::Int64(9),
15070 ]
15071 );
15072 }
15073
15074 #[test]
15075 fn from_range_negative_step() {
15076 let index = Index::from_range(5, 0, -2);
15077 assert_eq!(
15078 index.labels(),
15079 &[
15080 IndexLabel::Int64(5),
15081 IndexLabel::Int64(3),
15082 IndexLabel::Int64(1),
15083 ]
15084 );
15085 }
15086
15087 #[test]
15088 fn from_range_empty_when_step_zero() {
15089 let index = Index::from_range(0, 5, 0);
15090 assert!(index.is_empty());
15091 }
15092
15093 #[test]
15094 fn set_ops_empty_inputs() {
15095 let empty = Index::new(Vec::new());
15096 let non_empty = Index::from_i64(vec![1, 2]);
15097 assert!(empty.intersection(&non_empty).is_empty());
15098 assert_eq!(empty.union_with(&non_empty), non_empty);
15099 assert!(empty.difference(&non_empty).is_empty());
15100 assert_eq!(empty.symmetric_difference(&non_empty), non_empty);
15101 }
15102
15103 use super::{leapfrog_intersection, leapfrog_union, multi_way_align};
15106
15107 #[test]
15108 fn leapfrog_union_three_indexes() {
15109 let a = Index::from_i64(vec![1, 3, 5]);
15110 let b = Index::from_i64(vec![2, 3, 6]);
15111 let c = Index::from_i64(vec![4, 5, 6]);
15112 let result = leapfrog_union(&[&a, &b, &c]);
15113 assert_eq!(
15114 result.labels(),
15115 &[
15116 IndexLabel::Int64(1),
15117 IndexLabel::Int64(2),
15118 IndexLabel::Int64(3),
15119 IndexLabel::Int64(4),
15120 IndexLabel::Int64(5),
15121 IndexLabel::Int64(6),
15122 ]
15123 );
15124 }
15125
15126 #[test]
15127 fn leapfrog_union_deduplicates() {
15128 let a = Index::from_i64(vec![1, 1, 2]);
15129 let b = Index::from_i64(vec![2, 2, 3]);
15130 let result = leapfrog_union(&[&a, &b]);
15131 assert_eq!(
15132 result.labels(),
15133 &[
15134 IndexLabel::Int64(1),
15135 IndexLabel::Int64(2),
15136 IndexLabel::Int64(3),
15137 ]
15138 );
15139 }
15140
15141 #[test]
15142 fn leapfrog_union_single_index() {
15143 let a = Index::from_i64(vec![3, 1, 2]);
15144 let result = leapfrog_union(&[&a]);
15145 assert_eq!(
15146 result.labels(),
15147 &[
15148 IndexLabel::Int64(1),
15149 IndexLabel::Int64(2),
15150 IndexLabel::Int64(3),
15151 ]
15152 );
15153 }
15154
15155 #[test]
15156 fn leapfrog_union_empty() {
15157 let result = leapfrog_union(&[]);
15158 assert!(result.is_empty());
15159 }
15160
15161 #[test]
15162 fn leapfrog_union_with_empty_input() {
15163 let a = Index::from_i64(vec![1, 2]);
15164 let b = Index::new(Vec::new());
15165 let result = leapfrog_union(&[&a, &b]);
15166 assert_eq!(
15167 result.labels(),
15168 &[IndexLabel::Int64(1), IndexLabel::Int64(2)]
15169 );
15170 }
15171
15172 #[test]
15173 fn leapfrog_intersection_three_indexes() {
15174 let a = Index::from_i64(vec![1, 2, 3, 4, 5]);
15175 let b = Index::from_i64(vec![2, 3, 5, 7]);
15176 let c = Index::from_i64(vec![3, 5, 8]);
15177 let result = leapfrog_intersection(&[&a, &b, &c]);
15178 assert_eq!(
15179 result.labels(),
15180 &[IndexLabel::Int64(3), IndexLabel::Int64(5)]
15181 );
15182 }
15183
15184 #[test]
15185 fn leapfrog_intersection_disjoint() {
15186 let a = Index::from_i64(vec![1, 2]);
15187 let b = Index::from_i64(vec![3, 4]);
15188 let result = leapfrog_intersection(&[&a, &b]);
15189 assert!(result.is_empty());
15190 }
15191
15192 #[test]
15193 fn leapfrog_intersection_identical() {
15194 let a = Index::from_i64(vec![1, 2, 3]);
15195 let b = Index::from_i64(vec![1, 2, 3]);
15196 let result = leapfrog_intersection(&[&a, &b]);
15197 assert_eq!(
15198 result.labels(),
15199 &[
15200 IndexLabel::Int64(1),
15201 IndexLabel::Int64(2),
15202 IndexLabel::Int64(3),
15203 ]
15204 );
15205 }
15206
15207 #[test]
15208 fn leapfrog_intersection_with_unsorted_input() {
15209 let a = Index::from_i64(vec![5, 3, 1, 4, 2]);
15210 let b = Index::from_i64(vec![4, 2, 6, 1]);
15211 let result = leapfrog_intersection(&[&a, &b]);
15212 assert_eq!(
15213 result.labels(),
15214 &[
15215 IndexLabel::Int64(1),
15216 IndexLabel::Int64(2),
15217 IndexLabel::Int64(4),
15218 ]
15219 );
15220 }
15221
15222 #[test]
15223 fn leapfrog_intersection_empty_input() {
15224 let a = Index::from_i64(vec![1, 2, 3]);
15225 let b = Index::new(Vec::new());
15226 let result = leapfrog_intersection(&[&a, &b]);
15227 assert!(result.is_empty());
15228 }
15229
15230 #[test]
15231 fn multi_way_align_three_indexes() {
15232 let a = Index::from_i64(vec![1, 3]);
15233 let b = Index::from_i64(vec![2, 3]);
15234 let c = Index::from_i64(vec![1, 2]);
15235 let plan = multi_way_align(&[&a, &b, &c]);
15236 assert_eq!(
15237 plan.union_index.labels(),
15238 &[
15239 IndexLabel::Int64(1),
15240 IndexLabel::Int64(3),
15241 IndexLabel::Int64(2),
15242 ]
15243 );
15244 assert_eq!(plan.positions.len(), 3);
15245 assert_eq!(plan.positions[0], vec![Some(0), Some(1), None]);
15247 assert_eq!(plan.positions[1], vec![None, Some(1), Some(0)]);
15249 assert_eq!(plan.positions[2], vec![Some(0), None, Some(1)]);
15251 }
15252
15253 #[test]
15254 fn multi_way_align_empty() {
15255 let plan = multi_way_align(&[]);
15256 assert!(plan.union_index.is_empty());
15257 assert!(plan.positions.is_empty());
15258 }
15259
15260 #[test]
15261 fn multi_way_align_isomorphic_with_pairwise() {
15262 let a = Index::from_i64(vec![1, 4, 7]);
15265 let b = Index::from_i64(vec![2, 4, 8]);
15266 let c = Index::from_i64(vec![3, 7, 8]);
15267
15268 let multi = leapfrog_union(&[&a, &b, &c]);
15269
15270 let ab = a.union_with(&b);
15272 let abc = ab.union_with(&c);
15273 let pairwise = abc.sort_values();
15274
15275 assert_eq!(multi.labels(), pairwise.labels());
15276 }
15277
15278 #[test]
15279 fn leapfrog_union_utf8_labels() {
15280 let a = Index::new(vec!["c".into(), "a".into()]);
15281 let b = Index::new(vec!["b".into(), "d".into()]);
15282 let result = leapfrog_union(&[&a, &b]);
15283 assert_eq!(
15284 result.labels(),
15285 &["a".into(), "b".into(), "c".into(), "d".into()]
15286 );
15287 }
15288
15289 #[test]
15290 fn leapfrog_large_multi_way() {
15291 let indexes: Vec<Index> = (0..5)
15293 .map(|i| {
15294 let start = i * 200;
15295 let end = start + 1000;
15296 Index::from_i64((start..end).collect())
15297 })
15298 .collect();
15299 let refs: Vec<&Index> = indexes.iter().collect();
15300
15301 let union = leapfrog_union(&refs);
15302 assert_eq!(union.len(), 1800);
15304
15305 let intersection = leapfrog_intersection(&refs);
15306 assert_eq!(intersection.len(), 200);
15308 }
15309
15310 #[test]
15313 fn ag11t_two_sorted_identical() {
15314 let a = Index::from_i64(vec![1, 2, 3]);
15315 let b = Index::from_i64(vec![1, 2, 3]);
15316 let result = leapfrog_union(&[&a, &b]);
15317 assert_eq!(
15318 result.labels(),
15319 &[
15320 IndexLabel::Int64(1),
15321 IndexLabel::Int64(2),
15322 IndexLabel::Int64(3)
15323 ]
15324 );
15325 let plan = multi_way_align(&[&a, &b]);
15326 assert_eq!(plan.positions[0], vec![Some(0), Some(1), Some(2)]);
15328 assert_eq!(plan.positions[1], vec![Some(0), Some(1), Some(2)]);
15329 eprintln!("[AG-11-T] two_sorted_identical | in=[3,3] out=3 | PASS");
15330 }
15331
15332 #[test]
15333 fn ag11t_two_sorted_disjoint() {
15334 let a = Index::from_i64(vec![1, 2, 3]);
15335 let b = Index::from_i64(vec![4, 5, 6]);
15336 let result = leapfrog_union(&[&a, &b]);
15337 assert_eq!(result.len(), 6);
15338 assert_eq!(result.labels()[0], IndexLabel::Int64(1));
15339 assert_eq!(result.labels()[5], IndexLabel::Int64(6));
15340 eprintln!("[AG-11-T] two_sorted_disjoint | in=[3,3] out=6 | PASS");
15341 }
15342
15343 #[test]
15344 fn ag11t_two_sorted_overlapping_with_positions() {
15345 let a = Index::from_i64(vec![1, 3, 5]);
15346 let b = Index::from_i64(vec![2, 3, 4]);
15347 let plan = multi_way_align(&[&a, &b]);
15348 assert_eq!(
15349 plan.union_index.labels(),
15350 &[
15351 IndexLabel::Int64(1),
15352 IndexLabel::Int64(3),
15353 IndexLabel::Int64(5),
15354 IndexLabel::Int64(2),
15355 IndexLabel::Int64(4),
15356 ]
15357 );
15358 assert_eq!(
15359 plan.positions[0],
15360 vec![Some(0), Some(1), Some(2), None, None]
15361 );
15362 assert_eq!(
15363 plan.positions[1],
15364 vec![None, Some(1), None, Some(0), Some(2)]
15365 );
15366 eprintln!("[AG-11-T] two_sorted_overlapping | in=[3,3] out=5 | PASS");
15367 }
15368
15369 #[test]
15370 fn ag11t_five_way_union_vs_pairwise() {
15371 let indexes: Vec<Index> = (0..5)
15372 .map(|i| Index::from_i64(vec![i * 10, i * 10 + 1, i * 10 + 2]))
15373 .collect();
15374 let refs: Vec<&Index> = indexes.iter().collect();
15375
15376 let leapfrog = leapfrog_union(&refs);
15377
15378 let mut pairwise = indexes[0].clone();
15380 for idx in &indexes[1..] {
15381 pairwise = pairwise.union_with(idx);
15382 }
15383 let pairwise = pairwise.sort_values();
15384
15385 assert_eq!(leapfrog.labels(), pairwise.labels());
15386 eprintln!(
15387 "[AG-11-T] five_way_union_vs_pairwise | in=[3x5] out={} | PASS",
15388 leapfrog.len()
15389 );
15390 }
15391
15392 #[test]
15393 fn ag11t_single_element_indexes() {
15394 let indexes: Vec<Index> = (0..10).map(|i| Index::from_i64(vec![i])).collect();
15395 let refs: Vec<&Index> = indexes.iter().collect();
15396 let result = leapfrog_union(&refs);
15397 assert_eq!(result.len(), 10);
15398 for (i, label) in result.labels().iter().enumerate() {
15399 assert_eq!(*label, IndexLabel::Int64(i as i64));
15400 }
15401 eprintln!("[AG-11-T] single_element_indexes | in=[1x10] out=10 | PASS");
15402 }
15403
15404 #[test]
15405 fn ag11t_all_same_labels() {
15406 let base = Index::from_i64(vec![1, 2, 3]);
15407 let refs: Vec<&Index> = (0..5).map(|_| &base).collect();
15408 let plan = multi_way_align(&refs);
15409 assert_eq!(
15410 plan.union_index.labels(),
15411 &[
15412 IndexLabel::Int64(1),
15413 IndexLabel::Int64(2),
15414 IndexLabel::Int64(3)
15415 ]
15416 );
15417 for pos_vec in &plan.positions {
15419 assert_eq!(*pos_vec, vec![Some(0), Some(1), Some(2)]);
15420 }
15421 eprintln!("[AG-11-T] all_same_labels | in=[3x5] out=3 | PASS");
15422 }
15423
15424 #[test]
15425 fn ag11t_iso_associativity() {
15426 let a = Index::from_i64(vec![1, 4, 7, 10]);
15427 let b = Index::from_i64(vec![2, 4, 8, 10]);
15428 let c = Index::from_i64(vec![3, 7, 8, 10]);
15429
15430 let leapfrog_result = leapfrog_union(&[&a, &b, &c]);
15431
15432 let bc = b.union_with(&c).sort_values();
15434 let a_bc = a.union_with(&bc).sort_values();
15435
15436 let ab = a.union_with(&b).sort_values();
15438 let ab_c = ab.union_with(&c).sort_values();
15439
15440 assert_eq!(leapfrog_result.labels(), a_bc.labels());
15441 assert_eq!(leapfrog_result.labels(), ab_c.labels());
15442 eprintln!("[AG-11-T] iso_associativity | verified | PASS");
15443 }
15444
15445 #[test]
15446 fn ag11t_iso_commutativity() {
15447 let a = Index::from_i64(vec![1, 5, 9]);
15448 let b = Index::from_i64(vec![2, 5, 8]);
15449 let c = Index::from_i64(vec![3, 5, 7]);
15450
15451 let abc = leapfrog_union(&[&a, &b, &c]);
15452 let cab = leapfrog_union(&[&c, &a, &b]);
15453 let bca = leapfrog_union(&[&b, &c, &a]);
15454
15455 assert_eq!(abc.labels(), cab.labels());
15457 assert_eq!(abc.labels(), bca.labels());
15458 eprintln!("[AG-11-T] iso_commutativity | verified | PASS");
15459 }
15460
15461 #[test]
15464 fn index_min_max_int() {
15465 let idx = Index::new(vec![3_i64.into(), 1_i64.into(), 2_i64.into()]);
15466 assert_eq!(idx.min(), Some(&IndexLabel::Int64(1)));
15467 assert_eq!(idx.max(), Some(&IndexLabel::Int64(3)));
15468 assert_eq!(idx.argmin(), Some(1));
15469 assert_eq!(idx.argmax(), Some(0));
15470 }
15471
15472 #[test]
15473 fn index_min_max_utf8() {
15474 let idx = Index::new(vec!["c".into(), "a".into(), "b".into()]);
15475 assert_eq!(idx.min(), Some(&IndexLabel::Utf8("a".into())));
15476 assert_eq!(idx.max(), Some(&IndexLabel::Utf8("c".into())));
15477 assert_eq!(idx.argmin(), Some(1));
15478 assert_eq!(idx.argmax(), Some(0));
15479 }
15480
15481 #[test]
15482 fn index_min_max_empty() {
15483 let idx = Index::new(vec![]);
15484 assert_eq!(idx.min(), None);
15485 assert_eq!(idx.max(), None);
15486 assert_eq!(idx.argmin(), None);
15487 assert_eq!(idx.argmax(), None);
15488 }
15489
15490 #[test]
15491 fn index_nunique() {
15492 let idx = Index::new(vec![1_i64.into(), 2_i64.into(), 1_i64.into()]);
15493 assert_eq!(idx.nunique(), 2);
15494 }
15495
15496 #[test]
15497 fn index_nunique_dropna_false_counts_timedelta_nat_once() {
15498 let idx = Index::from_timedelta64(vec![Timedelta::NAT, Timedelta::NAT, 5]);
15499 assert_eq!(idx.nunique(), 1);
15500 assert_eq!(idx.nunique_with_dropna(false), 2);
15501 }
15502
15503 #[test]
15504 fn index_nunique_dropna_false_counts_datetime_nat_once() {
15505 let idx = Index::new(vec![
15506 IndexLabel::Datetime64(i64::MIN),
15507 IndexLabel::Datetime64(i64::MIN),
15508 IndexLabel::Datetime64(1_700_000_000_000_000_000),
15509 ]);
15510 assert_eq!(idx.nunique(), 1);
15511 assert_eq!(idx.nunique_with_dropna(false), 2);
15512 }
15513
15514 #[test]
15517 fn index_map() {
15518 let idx = Index::new(vec![1_i64.into(), 2_i64.into(), 3_i64.into()]);
15519 let mapped = idx.map(|l| match l {
15520 IndexLabel::Int64(v) => IndexLabel::Int64(v * 10),
15521 other => other.clone(),
15522 });
15523 assert_eq!(mapped.labels()[0], IndexLabel::Int64(10));
15524 assert_eq!(mapped.labels()[2], IndexLabel::Int64(30));
15525 }
15526
15527 #[test]
15528 fn index_drop_labels() {
15529 let idx = Index::new(vec!["a".into(), "b".into(), "c".into()]);
15530 let dropped = idx.drop_labels(&["b".into()]);
15531 assert_eq!(dropped.len(), 2);
15532 assert_eq!(dropped.labels()[0], IndexLabel::Utf8("a".into()));
15533 assert_eq!(dropped.labels()[1], IndexLabel::Utf8("c".into()));
15534 }
15535
15536 #[test]
15537 fn index_astype_str() {
15538 let idx = Index::new(vec![1_i64.into(), 2_i64.into()]);
15539 let str_idx = idx.astype_str();
15540 assert_eq!(str_idx.labels()[0], IndexLabel::Utf8("1".into()));
15541 assert_eq!(str_idx.labels()[1], IndexLabel::Utf8("2".into()));
15542 }
15543
15544 #[test]
15545 fn index_astype_int() {
15546 let idx = Index::new(vec![
15547 IndexLabel::Utf8("10".into()),
15548 IndexLabel::Utf8("20".into()),
15549 ]);
15550 let int_idx = idx.astype_int();
15551 assert_eq!(int_idx.labels()[0], IndexLabel::Int64(10));
15552 assert_eq!(int_idx.labels()[1], IndexLabel::Int64(20));
15553 }
15554
15555 #[test]
15556 fn index_isna_notna() {
15557 let idx = Index::new(vec![1_i64.into(), 2_i64.into()]);
15558 assert_eq!(idx.isna(), vec![false, false]);
15559 assert_eq!(idx.notna(), vec![true, true]);
15560 }
15561
15562 #[test]
15563 fn index_isna_notna_detects_datetimelike_nat() {
15564 let datetime_idx = Index::new(vec![
15565 IndexLabel::Datetime64(i64::MIN),
15566 IndexLabel::Datetime64(1_700_000_000_000_000_000),
15567 ]);
15568 assert_eq!(datetime_idx.isna(), vec![true, false]);
15569 assert_eq!(datetime_idx.notna(), vec![false, true]);
15570
15571 let timedelta_idx = Index::from_timedelta64(vec![Timedelta::NAT, 5]);
15572 assert_eq!(timedelta_idx.isna(), vec![true, false]);
15573 assert_eq!(timedelta_idx.notna(), vec![false, true]);
15574 }
15575
15576 #[test]
15577 fn index_fillna_replaces_datetime_nat_and_preserves_name() {
15578 let idx = Index::new(vec![
15579 IndexLabel::Datetime64(i64::MIN),
15580 IndexLabel::Datetime64(1_700_000_000_000_000_000),
15581 IndexLabel::Datetime64(i64::MIN),
15582 ])
15583 .set_name("when");
15584
15585 let filled = idx.fillna(&IndexLabel::Datetime64(1_800_000_000_000_000_000));
15586
15587 assert_eq!(
15588 filled.labels(),
15589 &[
15590 IndexLabel::Datetime64(1_800_000_000_000_000_000),
15591 IndexLabel::Datetime64(1_700_000_000_000_000_000),
15592 IndexLabel::Datetime64(1_800_000_000_000_000_000),
15593 ]
15594 );
15595 assert_eq!(filled.name(), Some("when"));
15596 }
15597
15598 #[test]
15599 fn index_fillna_replaces_timedelta_nat() {
15600 let idx = Index::from_timedelta64(vec![Timedelta::NAT, 5, Timedelta::NAT]);
15601
15602 let filled = idx.fillna(&IndexLabel::Timedelta64(42));
15603
15604 assert_eq!(
15605 filled.labels(),
15606 &[
15607 IndexLabel::Timedelta64(42),
15608 IndexLabel::Timedelta64(5),
15609 IndexLabel::Timedelta64(42),
15610 ]
15611 );
15612 }
15613
15614 #[test]
15615 fn index_dropna_removes_missing_and_preserves_name() {
15616 let idx =
15617 Index::from_timedelta64(vec![1, Timedelta::NAT, 3, Timedelta::NAT, 5]).set_name("t");
15618 let dropped = idx.dropna();
15619 assert_eq!(
15620 dropped.labels(),
15621 &[
15622 IndexLabel::Timedelta64(1),
15623 IndexLabel::Timedelta64(3),
15624 IndexLabel::Timedelta64(5),
15625 ]
15626 );
15627 assert_eq!(dropped.name(), Some("t"));
15628 }
15629
15630 #[test]
15631 fn index_dropna_all_present_is_noop() {
15632 let idx = Index::from_i64(vec![1, 2, 3]);
15633 let dropped = idx.dropna();
15634 assert_eq!(
15635 dropped.labels(),
15636 &[
15637 IndexLabel::Int64(1),
15638 IndexLabel::Int64(2),
15639 IndexLabel::Int64(3),
15640 ]
15641 );
15642 }
15643
15644 #[test]
15645 fn index_insert_at_middle_position() {
15646 let idx = Index::from_i64(vec![1, 3, 4]);
15647 let result = idx.insert(1, IndexLabel::Int64(2)).unwrap();
15648 assert_eq!(
15649 result.labels(),
15650 &[
15651 IndexLabel::Int64(1),
15652 IndexLabel::Int64(2),
15653 IndexLabel::Int64(3),
15654 IndexLabel::Int64(4),
15655 ]
15656 );
15657 }
15658
15659 #[test]
15660 fn index_insert_at_end_appends() {
15661 let idx = Index::from_i64(vec![1, 2]);
15662 let result = idx.insert(2, IndexLabel::Int64(3)).unwrap();
15663 assert_eq!(
15664 result.labels(),
15665 &[
15666 IndexLabel::Int64(1),
15667 IndexLabel::Int64(2),
15668 IndexLabel::Int64(3),
15669 ]
15670 );
15671 }
15672
15673 #[test]
15674 fn index_insert_past_end_errors() {
15675 let idx = Index::from_i64(vec![1, 2]);
15676 let err = idx.insert(5, IndexLabel::Int64(9)).unwrap_err();
15677 assert!(matches!(err, crate::IndexError::OutOfBounds { .. }));
15678 }
15679
15680 #[test]
15681 fn index_delete_removes_position() {
15682 let idx = Index::from_i64(vec![10, 20, 30]).set_name("k");
15683 let result = idx.delete(1).unwrap();
15684 assert_eq!(
15685 result.labels(),
15686 &[IndexLabel::Int64(10), IndexLabel::Int64(30)]
15687 );
15688 assert_eq!(result.name(), Some("k"));
15689 }
15690
15691 #[test]
15692 fn index_delete_out_of_bounds_errors() {
15693 let idx = Index::from_i64(vec![1]);
15694 let err = idx.delete(1).unwrap_err();
15695 assert!(matches!(err, crate::IndexError::OutOfBounds { .. }));
15696 }
15697
15698 #[test]
15699 fn index_append_concatenates() {
15700 let a = Index::from_i64(vec![1, 2]).set_name("left");
15701 let b = Index::from_i64(vec![3, 4]);
15702 let result = a.append(&b);
15703 assert_eq!(
15704 result.labels(),
15705 &[
15706 IndexLabel::Int64(1),
15707 IndexLabel::Int64(2),
15708 IndexLabel::Int64(3),
15709 IndexLabel::Int64(4),
15710 ]
15711 );
15712 assert_eq!(result.name(), Some("left"));
15713 }
15714
15715 #[test]
15716 fn index_append_empty_is_noop() {
15717 let a = Index::from_i64(vec![1, 2]);
15718 let empty = Index::new(Vec::new());
15719 let result = a.append(&empty);
15720 assert_eq!(result.labels(), a.labels());
15721 }
15722
15723 #[test]
15724 fn index_repeat_duplicates_each_label() {
15725 let idx = Index::from_i64(vec![1, 2, 3]).set_name("k");
15726 let result = idx.repeat(2);
15727 assert_eq!(
15728 result.labels(),
15729 &[
15730 IndexLabel::Int64(1),
15731 IndexLabel::Int64(1),
15732 IndexLabel::Int64(2),
15733 IndexLabel::Int64(2),
15734 IndexLabel::Int64(3),
15735 IndexLabel::Int64(3),
15736 ]
15737 );
15738 assert_eq!(result.name(), Some("k"));
15739 }
15740
15741 #[test]
15742 fn index_repeat_zero_yields_empty() {
15743 let idx = Index::from_i64(vec![1, 2, 3]);
15744 let result = idx.repeat(0);
15745 assert!(result.labels().is_empty());
15746 }
15747
15748 #[test]
15749 fn index_repeat_one_is_clone() {
15750 let idx = Index::from_i64(vec![1, 2]);
15751 let result = idx.repeat(1);
15752 assert_eq!(result.labels(), idx.labels());
15753 }
15754
15755 #[test]
15756 fn index_equals_same_labels_ignores_name() {
15757 let a = Index::from_i64(vec![1, 2, 3]).set_name("x");
15758 let b = Index::from_i64(vec![1, 2, 3]).set_name("y");
15759 assert!(a.equals(&b));
15760 }
15761
15762 #[test]
15763 fn index_equals_differing_labels_false() {
15764 let a = Index::from_i64(vec![1, 2, 3]);
15765 let b = Index::from_i64(vec![1, 2]);
15766 assert!(!a.equals(&b));
15767 }
15768
15769 #[test]
15770 fn index_identical_requires_matching_name() {
15771 let a = Index::from_i64(vec![1, 2]).set_name("x");
15772 let b = Index::from_i64(vec![1, 2]).set_name("y");
15773 assert!(a.equals(&b));
15774 assert!(!a.identical(&b));
15775 let c = Index::from_i64(vec![1, 2]).set_name("x");
15776 assert!(a.identical(&c));
15777 }
15778
15779 #[test]
15780 fn index_value_counts_sorts_by_descending_count() {
15781 let idx = Index::new(vec![
15782 "a".into(),
15783 "b".into(),
15784 "a".into(),
15785 "c".into(),
15786 "a".into(),
15787 "b".into(),
15788 ]);
15789 let counts = idx.value_counts();
15790 assert_eq!(counts[0].0, IndexLabel::Utf8("a".into()));
15791 assert_eq!(counts[0].1, 3);
15792 assert_eq!(counts[1].0, IndexLabel::Utf8("b".into()));
15793 assert_eq!(counts[1].1, 2);
15794 assert_eq!(counts[2].0, IndexLabel::Utf8("c".into()));
15795 assert_eq!(counts[2].1, 1);
15796 }
15797
15798 #[test]
15799 fn index_value_counts_empty() {
15800 let idx = Index::new(Vec::<IndexLabel>::new());
15801 assert!(idx.value_counts().is_empty());
15802 }
15803
15804 #[test]
15805 fn index_value_counts_drops_missing_by_default() {
15806 let idx = Index::new(vec![
15807 IndexLabel::Datetime64(i64::MIN),
15808 IndexLabel::Utf8("a".into()),
15809 IndexLabel::Utf8("a".into()),
15810 IndexLabel::Datetime64(i64::MIN),
15811 ]);
15812
15813 let counts = idx.value_counts();
15814 assert_eq!(counts, vec![(IndexLabel::Utf8("a".into()), 2)]);
15815 }
15816
15817 #[test]
15818 fn index_value_counts_with_options_preserves_first_seen_order_when_unsorted() {
15819 let idx = Index::new(vec![
15820 IndexLabel::Datetime64(i64::MIN),
15821 IndexLabel::Utf8("b".into()),
15822 IndexLabel::Utf8("a".into()),
15823 IndexLabel::Utf8("b".into()),
15824 ]);
15825
15826 let counts = idx.value_counts_with_options(false, false, false, false);
15827 assert_eq!(
15828 counts,
15829 vec![
15830 (IndexLabel::Datetime64(i64::MIN), Scalar::Int64(1)),
15831 (IndexLabel::Utf8("b".into()), Scalar::Int64(2)),
15832 (IndexLabel::Utf8("a".into()), Scalar::Int64(1)),
15833 ]
15834 );
15835 }
15836
15837 #[test]
15838 fn index_value_counts_with_options_normalize_excludes_missing_from_denominator() {
15839 let idx = Index::new(vec![
15840 IndexLabel::Int64(1),
15841 IndexLabel::Int64(1),
15842 IndexLabel::Int64(2),
15843 IndexLabel::Datetime64(i64::MIN),
15844 ]);
15845
15846 let counts = idx.value_counts_with_options(true, true, false, true);
15847 assert!(matches!(
15848 counts.as_slice(),
15849 [
15850 (IndexLabel::Int64(1), Scalar::Float64(_)),
15851 (IndexLabel::Int64(2), Scalar::Float64(_))
15852 ]
15853 ));
15854 let [
15855 (IndexLabel::Int64(1), Scalar::Float64(first)),
15856 (IndexLabel::Int64(2), Scalar::Float64(second)),
15857 ] = counts.as_slice()
15858 else {
15859 return;
15860 };
15861 assert!((first - (2.0 / 3.0)).abs() < 1e-12);
15862 assert!((second - (1.0 / 3.0)).abs() < 1e-12);
15863 }
15864
15865 #[test]
15866 fn index_shift_positive_pads_left() {
15867 let idx = Index::from_i64(vec![1, 2, 3, 4]).set_name("k");
15868 let shifted = idx.shift(2, IndexLabel::Int64(-1));
15869 assert_eq!(
15870 shifted.labels(),
15871 &[
15872 IndexLabel::Int64(-1),
15873 IndexLabel::Int64(-1),
15874 IndexLabel::Int64(1),
15875 IndexLabel::Int64(2),
15876 ]
15877 );
15878 assert_eq!(shifted.name(), Some("k"));
15879 }
15880
15881 #[test]
15882 fn index_shift_negative_pads_right() {
15883 let idx = Index::from_i64(vec![1, 2, 3, 4]);
15884 let shifted = idx.shift(-1, IndexLabel::Int64(0));
15885 assert_eq!(
15886 shifted.labels(),
15887 &[
15888 IndexLabel::Int64(2),
15889 IndexLabel::Int64(3),
15890 IndexLabel::Int64(4),
15891 IndexLabel::Int64(0),
15892 ]
15893 );
15894 }
15895
15896 #[test]
15897 fn index_shift_zero_is_clone() {
15898 let idx = Index::from_i64(vec![1, 2, 3]);
15899 let shifted = idx.shift(0, IndexLabel::Int64(-1));
15900 assert_eq!(shifted.labels(), idx.labels());
15901 }
15902
15903 #[test]
15904 fn index_shift_larger_than_len_fills_all() {
15905 let idx = Index::from_i64(vec![1, 2, 3]);
15906 let shifted = idx.shift(10, IndexLabel::Int64(-1));
15907 assert_eq!(
15908 shifted.labels(),
15909 &[
15910 IndexLabel::Int64(-1),
15911 IndexLabel::Int64(-1),
15912 IndexLabel::Int64(-1),
15913 ]
15914 );
15915 }
15916
15917 #[test]
15918 fn index_any_all_basic() {
15919 let idx = Index::from_i64(vec![0, 0, 1]);
15920 assert!(idx.any());
15921 assert!(!idx.all());
15922
15923 let all_nonzero = Index::from_i64(vec![1, 2, 3]);
15924 assert!(all_nonzero.all());
15925 assert!(all_nonzero.any());
15926
15927 let all_zero = Index::from_i64(vec![0, 0]);
15928 assert!(!all_zero.any());
15929 assert!(!all_zero.all());
15930 }
15931
15932 #[test]
15933 fn index_all_empty_is_true() {
15934 let idx = Index::new(Vec::<IndexLabel>::new());
15935 assert!(idx.all());
15936 assert!(!idx.any());
15937 }
15938
15939 #[test]
15940 fn index_any_string_nonempty_truthy() {
15941 let idx = Index::new(vec!["".into(), "".into(), "x".into()]);
15942 assert!(idx.any());
15943 assert!(!idx.all());
15944 }
15945
15946 #[test]
15947 fn index_to_list_returns_owned_labels() {
15948 let idx = Index::from_i64(vec![1, 2, 3]);
15949 assert_eq!(
15950 idx.to_list(),
15951 vec![
15952 IndexLabel::Int64(1),
15953 IndexLabel::Int64(2),
15954 IndexLabel::Int64(3),
15955 ]
15956 );
15957 }
15958
15959 #[test]
15960 fn index_format_stringifies_labels() {
15961 let idx = Index::new(vec![
15962 IndexLabel::Int64(10),
15963 IndexLabel::Utf8("abc".into()),
15964 IndexLabel::Int64(-5),
15965 ]);
15966 assert_eq!(idx.format(), vec!["10", "abc", "-5"]);
15967 }
15968
15969 #[test]
15970 fn index_putmask_replaces_true_positions() {
15971 let idx = Index::from_i64(vec![1, 2, 3, 4]).set_name("k");
15972 let cond = vec![false, true, false, true];
15973 let replaced = idx.putmask(&cond, &IndexLabel::Int64(0));
15974 assert_eq!(
15975 replaced.labels(),
15976 &[
15977 IndexLabel::Int64(1),
15978 IndexLabel::Int64(0),
15979 IndexLabel::Int64(3),
15980 IndexLabel::Int64(0),
15981 ]
15982 );
15983 assert_eq!(replaced.name(), Some("k"));
15984 }
15985
15986 #[test]
15987 fn index_putmask_short_cond_leaves_tail_unchanged() {
15988 let idx = Index::from_i64(vec![1, 2, 3, 4]);
15989 let cond = vec![true];
15992 let replaced = idx.putmask(&cond, &IndexLabel::Int64(-1));
15993 assert_eq!(
15994 replaced.labels(),
15995 &[
15996 IndexLabel::Int64(-1),
15997 IndexLabel::Int64(2),
15998 IndexLabel::Int64(3),
15999 IndexLabel::Int64(4),
16000 ]
16001 );
16002 }
16003
16004 #[test]
16005 fn index_putmask_empty_cond_is_noop() {
16006 let idx = Index::from_i64(vec![1, 2]);
16007 let replaced = idx.putmask(&[], &IndexLabel::Int64(0));
16008 assert_eq!(replaced.labels(), idx.labels());
16009 }
16010
16011 #[test]
16012 fn index_asof_finds_largest_not_exceeding() {
16013 let idx = Index::from_i64(vec![1, 3, 5, 7]);
16014 assert_eq!(idx.asof(&IndexLabel::Int64(4)), Some(IndexLabel::Int64(3)));
16015 assert_eq!(idx.asof(&IndexLabel::Int64(5)), Some(IndexLabel::Int64(5)));
16016 assert_eq!(idx.asof(&IndexLabel::Int64(7)), Some(IndexLabel::Int64(7)));
16017 assert_eq!(
16018 idx.asof(&IndexLabel::Int64(100)),
16019 Some(IndexLabel::Int64(7))
16020 );
16021 }
16022
16023 #[test]
16024 fn index_asof_before_first_returns_none() {
16025 let idx = Index::from_i64(vec![5, 10]);
16026 assert_eq!(idx.asof(&IndexLabel::Int64(0)), None);
16027 }
16028
16029 #[test]
16030 fn index_searchsorted_left_right() {
16031 let idx = Index::from_i64(vec![1, 2, 2, 5]);
16032 assert_eq!(idx.searchsorted(&IndexLabel::Int64(2), "left").unwrap(), 1);
16033 assert_eq!(idx.searchsorted(&IndexLabel::Int64(2), "right").unwrap(), 3);
16034 assert_eq!(idx.searchsorted(&IndexLabel::Int64(0), "left").unwrap(), 0);
16035 assert_eq!(idx.searchsorted(&IndexLabel::Int64(6), "left").unwrap(), 4);
16036 }
16037
16038 #[test]
16039 fn index_searchsorted_rejects_invalid_side() {
16040 let idx = Index::from_i64(vec![1]);
16041 assert!(idx.searchsorted(&IndexLabel::Int64(0), "middle").is_err());
16042 }
16043
16044 #[test]
16045 fn index_memory_usage_counts_fixed_width() {
16046 let idx = Index::from_i64(vec![1, 2, 3]);
16047 let shallow = idx.memory_usage(false);
16048 assert_eq!(shallow, 24); assert_eq!(idx.memory_usage(true), 24);
16051 }
16052
16053 #[test]
16054 fn index_memory_usage_deep_counts_utf8_bytes() {
16055 let idx = Index::new(vec![
16056 IndexLabel::Utf8("hi".into()),
16057 IndexLabel::Utf8("world".into()),
16058 ]);
16059 let shallow = idx.memory_usage(false);
16060 let deep = idx.memory_usage(true);
16061 assert_eq!(deep - shallow, 7);
16063 }
16064
16065 #[test]
16066 fn index_nlevels_flat_index_is_one() {
16067 let idx = Index::from_i64(vec![1, 2]);
16068 assert_eq!(idx.nlevels(), 1);
16069 }
16070
16071 #[test]
16072 fn index_where_cond() {
16073 let idx = Index::new(vec!["a".into(), "b".into(), "c".into()]);
16074 let cond = vec![true, false, true];
16075 let result = idx.where_cond(&cond, &"X".into());
16076 assert_eq!(result.labels()[0], IndexLabel::Utf8("a".into()));
16077 assert_eq!(result.labels()[1], IndexLabel::Utf8("X".into()));
16078 assert_eq!(result.labels()[2], IndexLabel::Utf8("c".into()));
16079 }
16080
16081 #[test]
16082 fn index_a31qh_conversion_aliases_materialize_labels() {
16083 let idx = Index::new(vec!["a".into(), "b".into()]).set_name("key");
16084 let labels = vec![IndexLabel::from("a"), IndexLabel::from("b")];
16085
16086 assert_eq!(idx.tolist(), labels);
16087 assert_eq!(idx.to_numpy(), labels);
16088 assert_eq!(idx.array(), labels);
16089 assert_eq!(idx.values(), labels);
16090 assert_eq!(idx.ravel(), labels);
16091 assert_eq!(idx.view(), idx);
16092 assert_eq!(idx.transpose(), idx);
16093 assert_eq!(idx.T(), idx);
16094 assert_eq!(
16095 idx.to_frame(),
16096 vec![vec![IndexLabel::from("a")], vec![IndexLabel::from("b")]]
16097 );
16098 assert_eq!(
16099 idx.to_series(),
16100 vec![
16101 (IndexLabel::from("a"), IndexLabel::from("a")),
16102 (IndexLabel::from("b"), IndexLabel::from("b")),
16103 ]
16104 );
16105 }
16106
16107 #[test]
16108 fn index_a31qh_dtype_metadata_and_type_checks() {
16109 let ints = Index::from_i64(vec![1, 2, 3]);
16110 assert_eq!(ints.dtype(), "int64");
16111 assert_eq!(ints.dtypes(), vec!["int64"]);
16112 assert_eq!(ints.inferred_type(), "integer");
16113 assert!(ints.holds_integer());
16114 assert!(ints.is_integer());
16115 assert!(ints.is_numeric());
16116 assert!(!ints.is_object());
16117 assert_eq!(ints.ndim(), 1);
16118 assert_eq!(ints.shape(), (3,));
16119 assert_eq!(ints.size(), 3);
16120 assert_eq!(ints.nbytes(), ints.memory_usage(false));
16121 assert!(!ints.empty());
16122 assert_eq!(
16123 Index::from_i64(vec![42]).item().unwrap(),
16124 IndexLabel::Int64(42)
16125 );
16126 assert!(ints.item().is_err());
16127
16128 let mixed = Index::new(vec![
16129 IndexLabel::Int64(1),
16130 IndexLabel::Utf8("x".into()),
16131 IndexLabel::Datetime64(i64::MIN),
16132 ]);
16133 assert_eq!(mixed.dtype(), "object");
16134 assert_eq!(mixed.inferred_type(), "mixed");
16135 assert!(mixed.is_object());
16136 assert!(mixed.hasnans());
16137 assert_eq!(mixed.isnull(), mixed.isna());
16138 assert_eq!(mixed.notnull(), mixed.notna());
16139 assert!(!mixed.is_boolean());
16140 assert!(!mixed.is_categorical());
16141 assert!(!mixed.is_floating());
16142 assert!(!mixed.is_interval());
16143 assert_eq!(mixed.infer_objects(), mixed);
16144 assert!(ints.is_(&ints));
16145 assert!(!ints.is_(&Index::from_i64(vec![1, 2, 3])));
16146 }
16147
16148 #[test]
16149 fn index_a31qh_factorize_reindex_and_non_unique_indexer() {
16150 let idx = Index::new(vec![
16151 IndexLabel::Utf8("a".into()),
16152 IndexLabel::Utf8("b".into()),
16153 IndexLabel::Utf8("a".into()),
16154 IndexLabel::Datetime64(i64::MIN),
16155 ])
16156 .set_name("letters");
16157
16158 let (codes, uniques) = idx.factorize();
16159 assert_eq!(codes, vec![0, 1, 0, -1]);
16160 assert_eq!(
16161 uniques.labels(),
16162 &[IndexLabel::from("a"), IndexLabel::from("b")]
16163 );
16164 assert_eq!(uniques.name(), Some("letters"));
16165
16166 let target = Index::new(vec![
16167 IndexLabel::Utf8("a".into()),
16168 IndexLabel::Utf8("z".into()),
16169 IndexLabel::Utf8("b".into()),
16170 ]);
16171 assert_eq!(idx.get_indexer_for(&target), vec![Some(0), None, Some(1)]);
16172 assert_eq!(
16173 idx.get_indexer_non_unique(&target),
16174 (vec![0, 2, -1, 1], vec![1])
16175 );
16176
16177 let (reindexed, positions) = idx.reindex(&target);
16178 assert_eq!(reindexed, target);
16179 assert_eq!(positions, vec![Some(0), None, Some(1)]);
16180 }
16181
16182 #[test]
16183 fn index_a31qh_set_sort_slice_and_level_aliases() {
16184 let idx = Index::from_i64(vec![3, 1, 2]).set_name("n");
16185 let sorted = idx.sort();
16186 assert_eq!(
16187 sorted.labels(),
16188 &[
16189 IndexLabel::Int64(1),
16190 IndexLabel::Int64(2),
16191 IndexLabel::Int64(3),
16192 ]
16193 );
16194 let (sortlevel, order) = idx.sortlevel();
16195 assert_eq!(sortlevel, sorted);
16196 assert_eq!(order, vec![1, 2, 0]);
16197
16198 let other = Index::from_i64(vec![2, 4]);
16199 assert_eq!(idx.union(&other), idx.union_with(&other));
16200 assert_eq!(
16201 idx.drop(&[IndexLabel::Int64(1)]),
16202 idx.drop_labels(&[IndexLabel::Int64(1)])
16203 );
16204 assert_eq!(idx.copy(), idx);
16205 assert_eq!(
16206 idx.where_(&[true, false, true], &IndexLabel::Int64(0))
16207 .labels()[1],
16208 IndexLabel::Int64(0)
16209 );
16210 assert_eq!(idx.get_level_values(0).unwrap(), idx);
16211 assert!(idx.get_level_values(1).is_err());
16212 assert!(idx.droplevel(0).is_err());
16213
16214 let sorted_lookup = Index::from_i64(vec![1, 2, 2, 4]);
16215 assert_eq!(
16216 sorted_lookup
16217 .get_slice_bound(&IndexLabel::Int64(2), "left")
16218 .unwrap(),
16219 1
16220 );
16221 assert_eq!(
16222 sorted_lookup
16223 .slice_locs(Some(&IndexLabel::Int64(2)), Some(&IndexLabel::Int64(4)))
16224 .unwrap(),
16225 (1, 4)
16226 );
16227 assert_eq!(
16228 sorted_lookup
16229 .slice_indexer(Some(&IndexLabel::Int64(2)), Some(&IndexLabel::Int64(2)))
16230 .unwrap(),
16231 (1, 3)
16232 );
16233 }
16234
16235 #[test]
16236 fn index_a31qh_astype_str_groupby_join_asof_and_diff() {
16237 let idx = Index::new(vec![
16238 IndexLabel::Utf8("Alpha".into()),
16239 IndexLabel::Utf8("beta".into()),
16240 IndexLabel::Int64(7),
16241 ]);
16242 assert_eq!(
16243 idx.r#str().lower(),
16244 vec![Some("alpha".to_owned()), Some("beta".to_owned()), None]
16245 );
16246 assert_eq!(
16247 idx.r#str().upper(),
16248 vec![Some("ALPHA".to_owned()), Some("BETA".to_owned()), None]
16249 );
16250 assert_eq!(
16251 idx.r#str().contains("ta"),
16252 vec![Some(false), Some(true), None]
16253 );
16254 assert_eq!(idx.r#str().len(), vec![Some(5), Some(4), None]);
16255 assert_eq!(idx.r#str().is_empty(), vec![Some(false), Some(false), None]);
16256 assert!(idx.astype("object").is_ok());
16257 assert!(idx.astype("float64").is_err());
16258
16259 let grouped = Index::new(vec!["a".into(), "b".into(), "a".into()]).groupby();
16260 assert_eq!(grouped[&IndexLabel::from("a")], vec![0, 2]);
16261 assert_eq!(grouped[&IndexLabel::from("b")], vec![1]);
16262
16263 let left = Index::from_i64(vec![1, 2, 3]);
16264 let right = Index::from_i64(vec![2, 4]);
16265 assert_eq!(
16266 left.join(&right, "inner").unwrap(),
16267 left.intersection(&right)
16268 );
16269 assert_eq!(left.join(&right, "outer").unwrap(), left.union_with(&right));
16270 assert_eq!(left.join(&right, "left").unwrap(), left);
16271 assert_eq!(left.join(&right, "right").unwrap(), right);
16272 assert!(left.join(&right, "sideways").is_err());
16273
16274 let sorted = Index::from_i64(vec![1, 3, 5, 7]);
16275 let probes = Index::from_i64(vec![0, 3, 4, 8]);
16276 assert_eq!(
16277 sorted.asof_locs(&probes, None),
16278 vec![None, Some(1), Some(1), Some(3)]
16279 );
16280 assert_eq!(
16281 sorted.asof_locs(&probes, Some(&[true, false, true, true])),
16282 vec![None, Some(0), Some(0), Some(3)]
16283 );
16284
16285 assert_eq!(
16286 sorted.diff(1),
16287 vec![
16288 None,
16289 Some(IndexLabel::Int64(2)),
16290 Some(IndexLabel::Int64(2)),
16291 Some(IndexLabel::Int64(2)),
16292 ]
16293 );
16294 let datetimes = Index::from_datetime64(vec![10, 25]);
16295 assert_eq!(
16296 datetimes.diff(1),
16297 vec![None, Some(IndexLabel::Timedelta64(15))]
16298 );
16299 }
16300
16301 #[test]
16304 fn index_name_default_none() {
16305 let idx = Index::new(vec![1_i64.into(), 2_i64.into()]);
16306 assert_eq!(idx.name(), None);
16307 }
16308
16309 #[test]
16310 fn index_set_name() {
16311 let idx = Index::new(vec![1_i64.into(), 2_i64.into()]);
16312 let named = idx.set_name("year");
16313 assert_eq!(named.name(), Some("year"));
16314 assert_eq!(named.labels(), idx.labels());
16315 }
16316
16317 #[test]
16318 fn index_set_names_some_and_none() {
16319 let idx = Index::new(vec!["a".into(), "b".into()]);
16320 let named = idx.set_names(Some("letters"));
16321 assert_eq!(named.name(), Some("letters"));
16322 let cleared = named.set_names(None);
16323 assert_eq!(cleared.name(), None);
16324 }
16325
16326 #[test]
16327 fn index_name_propagates_through_unique() {
16328 let idx = Index::new(vec![1_i64.into(), 1_i64.into(), 2_i64.into()]).set_name("id");
16329 let u = idx.unique();
16330 assert_eq!(u.name(), Some("id"));
16331 assert_eq!(u.len(), 2);
16332 }
16333
16334 #[test]
16335 fn index_name_propagates_through_sort_values() {
16336 let idx = Index::new(vec![3_i64.into(), 1_i64.into(), 2_i64.into()]).set_name("val");
16337 let sorted = idx.sort_values();
16338 assert_eq!(sorted.name(), Some("val"));
16339 }
16340
16341 #[test]
16342 fn index_name_propagates_through_take_and_slice() {
16343 let idx = Index::new(vec!["a".into(), "b".into(), "c".into()]).set_name("letter");
16344 assert_eq!(idx.take(&[0, 2]).name(), Some("letter"));
16345 assert_eq!(idx.slice(1, 2).name(), Some("letter"));
16346 }
16347
16348 #[test]
16349 fn index_name_propagates_through_map() {
16350 let idx = Index::new(vec![1_i64.into(), 2_i64.into()]).set_name("x");
16351 let mapped = idx.map(|l| match l {
16352 IndexLabel::Int64(v) => IndexLabel::Int64(v * 10),
16353 other => other.clone(),
16354 });
16355 assert_eq!(mapped.name(), Some("x"));
16356 }
16357
16358 #[test]
16359 fn index_name_propagates_through_drop_labels() {
16360 let idx = Index::new(vec![1_i64.into(), 2_i64.into(), 3_i64.into()]).set_name("num");
16361 let dropped = idx.drop_labels(&[2_i64.into()]);
16362 assert_eq!(dropped.name(), Some("num"));
16363 assert_eq!(dropped.len(), 2);
16364 }
16365
16366 #[test]
16367 fn index_name_propagates_through_astype() {
16368 let idx = Index::new(vec![1_i64.into(), 2_i64.into()]).set_name("n");
16369 assert_eq!(idx.astype_str().name(), Some("n"));
16370 let idx2 = Index::new(vec!["1".into(), "2".into()]).set_name("s");
16371 assert_eq!(idx2.astype_int().name(), Some("s"));
16372 }
16373
16374 #[test]
16375 fn index_name_shared_for_intersection() {
16376 let a = Index::new(vec![1_i64.into(), 2_i64.into()]).set_name("x");
16377 let b = Index::new(vec![2_i64.into(), 3_i64.into()]).set_name("x");
16378 assert_eq!(a.intersection(&b).name(), Some("x"));
16379
16380 let c = Index::new(vec![2_i64.into(), 3_i64.into()]).set_name("y");
16381 assert_eq!(a.intersection(&c).name(), None);
16382 }
16383
16384 #[test]
16385 fn index_name_shared_for_union() {
16386 let a = Index::new(vec![1_i64.into()]).set_name("k");
16387 let b = Index::new(vec![2_i64.into()]).set_name("k");
16388 assert_eq!(a.union_with(&b).name(), Some("k"));
16389
16390 let c = Index::new(vec![2_i64.into()]);
16391 assert_eq!(a.union_with(&c).name(), None);
16392 }
16393
16394 #[test]
16395 fn index_name_propagates_through_where_cond() {
16396 let idx = Index::new(vec!["a".into(), "b".into()]).set_name("col");
16397 let result = idx.where_cond(&[true, false], &"Z".into());
16398 assert_eq!(result.name(), Some("col"));
16399 }
16400
16401 #[test]
16402 fn index_rename_index() {
16403 let idx = Index::new(vec![1_i64.into()]);
16404 let named = idx.rename_index(Some("foo"));
16405 assert_eq!(named.name(), Some("foo"));
16406 let cleared = named.rename_index(None);
16407 assert_eq!(cleared.name(), None);
16408 }
16409
16410 #[test]
16411 fn index_equality_ignores_name() {
16412 let a = Index::new(vec![1_i64.into(), 2_i64.into()]).set_name("a");
16413 let b = Index::new(vec![1_i64.into(), 2_i64.into()]).set_name("b");
16414 assert_eq!(a, b);
16415 }
16416
16417 #[test]
16418 fn index_names_property() {
16419 let idx = Index::new(vec![1_i64.into()]);
16420 assert_eq!(idx.names(), vec![None]);
16421 let named = idx.set_name("x");
16422 assert_eq!(named.names(), vec![Some("x".to_string())]);
16423 }
16424
16425 #[test]
16426 fn index_set_names_list() {
16427 let idx = Index::new(vec![1_i64.into()]);
16428 let named = idx.set_names_list(&[Some("foo")]);
16429 assert_eq!(named.name(), Some("foo"));
16430 let cleared = named.set_names_list(&[None]);
16431 assert_eq!(cleared.name(), None);
16432 }
16433
16434 #[test]
16435 fn index_to_flat_index() {
16436 let idx = Index::new(vec!["a".into(), "b".into()]).set_name("x");
16437 let flat = idx.to_flat_index();
16438 assert_eq!(flat, idx);
16439 assert_eq!(flat.name(), Some("x"));
16440 }
16441
16442 #[test]
16445 fn multi_index_from_tuples() {
16446 let mi = MultiIndex::from_tuples(vec![
16447 vec!["a".into(), 1_i64.into()],
16448 vec!["a".into(), 2_i64.into()],
16449 vec!["b".into(), 1_i64.into()],
16450 ])
16451 .unwrap();
16452
16453 assert_eq!(mi.nlevels(), 2);
16454 assert_eq!(mi.len(), 3);
16455 assert!(!mi.is_empty());
16456 }
16457
16458 #[test]
16459 fn multi_index_from_tuples_ragged_errors() {
16460 let err = MultiIndex::from_tuples(vec![
16461 vec!["a".into(), 1_i64.into()],
16462 vec!["b".into()], ]);
16464 assert!(err.is_err());
16465 }
16466
16467 #[test]
16468 fn multi_index_from_arrays() {
16469 let mi = MultiIndex::from_arrays(vec![
16470 vec!["a".into(), "a".into(), "b".into()],
16471 vec![1_i64.into(), 2_i64.into(), 1_i64.into()],
16472 ])
16473 .unwrap();
16474
16475 assert_eq!(mi.nlevels(), 2);
16476 assert_eq!(mi.len(), 3);
16477 }
16478
16479 #[test]
16480 fn multi_index_from_arrays_length_mismatch_errors() {
16481 let err = MultiIndex::from_arrays(vec![
16482 vec!["a".into(), "b".into()],
16483 vec![1_i64.into()], ]);
16485 assert!(err.is_err());
16486 }
16487
16488 #[test]
16489 fn multi_index_from_frame_preserves_column_names_a1dv9() {
16490 let mi = MultiIndex::from_frame(vec![
16491 (
16492 Some("letter".into()),
16493 vec!["a".into(), "b".into(), "b".into()],
16494 ),
16495 (
16496 Some("number".into()),
16497 vec![1_i64.into(), 1_i64.into(), 2_i64.into()],
16498 ),
16499 ])
16500 .unwrap();
16501
16502 assert_eq!(mi.names(), &[Some("letter".into()), Some("number".into())]);
16503 assert_eq!(
16504 mi.to_list(),
16505 vec![
16506 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
16507 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(1)],
16508 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
16509 ]
16510 );
16511
16512 let empty = MultiIndex::from_frame(Vec::new()).unwrap();
16513 assert!(empty.is_empty());
16514 assert_eq!(empty.nlevels(), 0);
16515 }
16516
16517 #[test]
16518 fn multi_index_from_frame_rejects_length_mismatch_a1dv9() {
16519 let err = MultiIndex::from_frame(vec![
16520 (Some("letter".into()), vec!["a".into(), "b".into()]),
16521 (Some("number".into()), vec![1_i64.into()]),
16522 ])
16523 .unwrap_err();
16524
16525 assert!(matches!(
16526 err,
16527 super::IndexError::LengthMismatch {
16528 expected: 2,
16529 actual: 1,
16530 ..
16531 }
16532 ));
16533 }
16534
16535 #[test]
16536 fn multi_index_from_product() {
16537 let mi = MultiIndex::from_product(vec![
16538 vec!["a".into(), "b".into()],
16539 vec![1_i64.into(), 2_i64.into(), 3_i64.into()],
16540 ])
16541 .unwrap();
16542
16543 assert_eq!(mi.nlevels(), 2);
16544 assert_eq!(mi.len(), 6); }
16546
16547 #[test]
16548 fn multi_index_from_product_values() {
16549 let mi = MultiIndex::from_product(vec![
16550 vec!["x".into(), "y".into()],
16551 vec![1_i64.into(), 2_i64.into()],
16552 ])
16553 .unwrap();
16554
16555 assert_eq!(
16557 mi.get_tuple(0).unwrap(),
16558 vec![&IndexLabel::Utf8("x".into()), &IndexLabel::Int64(1)]
16559 );
16560 assert_eq!(
16561 mi.get_tuple(1).unwrap(),
16562 vec![&IndexLabel::Utf8("x".into()), &IndexLabel::Int64(2)]
16563 );
16564 assert_eq!(
16565 mi.get_tuple(2).unwrap(),
16566 vec![&IndexLabel::Utf8("y".into()), &IndexLabel::Int64(1)]
16567 );
16568 assert_eq!(
16569 mi.get_tuple(3).unwrap(),
16570 vec![&IndexLabel::Utf8("y".into()), &IndexLabel::Int64(2)]
16571 );
16572 }
16573
16574 #[test]
16575 fn multi_index_get_level_values() {
16576 let mi = MultiIndex::from_tuples(vec![
16577 vec!["a".into(), 1_i64.into()],
16578 vec!["b".into(), 2_i64.into()],
16579 ])
16580 .unwrap()
16581 .set_names(vec![Some("letter".into()), Some("number".into())]);
16582
16583 let level0 = mi.get_level_values(0).unwrap();
16584 assert_eq!(
16585 level0.labels(),
16586 &[IndexLabel::Utf8("a".into()), IndexLabel::Utf8("b".into())]
16587 );
16588 assert_eq!(level0.name(), Some("letter"));
16589
16590 let level1 = mi.get_level_values(1).unwrap();
16591 assert_eq!(
16592 level1.labels(),
16593 &[IndexLabel::Int64(1), IndexLabel::Int64(2)]
16594 );
16595 assert_eq!(level1.name(), Some("number"));
16596 }
16597
16598 #[test]
16599 fn multi_index_get_level_values_out_of_bounds() {
16600 let mi = MultiIndex::from_tuples(vec![vec!["a".into()]]).unwrap();
16601 assert!(mi.get_level_values(1).is_err());
16602 }
16603
16604 #[test]
16605 fn multi_index_metadata_shape_and_tuple_materialization() {
16606 let mi = MultiIndex::from_tuples(vec![
16607 vec!["a".into(), 1_i64.into()],
16608 vec!["a".into(), 2_i64.into()],
16609 vec!["b".into(), 1_i64.into()],
16610 ])
16611 .unwrap()
16612 .set_names(vec![Some("letter".into()), Some("number".into())]);
16613
16614 let tuples = vec![
16615 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
16616 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)],
16617 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(1)],
16618 ];
16619 assert_eq!(mi.name(), None);
16620 assert_eq!(mi.names(), &[Some("letter".into()), Some("number".into())]);
16621 assert_eq!(mi.size(), 3);
16622 assert_eq!(mi.shape(), (3,));
16623 assert_eq!(mi.ndim(), 1);
16624 assert!(!mi.empty());
16625 assert_eq!(mi.to_list(), tuples);
16626 assert_eq!(mi.tolist(), mi.to_list());
16627 assert_eq!(mi.to_numpy(), mi.to_list());
16628 assert_eq!(mi.values(), mi.to_list());
16629 assert_eq!(mi.array(), mi.to_list());
16630 assert_eq!(mi.ravel(), mi.to_list());
16631 assert_eq!(mi.format(), vec!["(a, 1)", "(a, 2)", "(b, 1)"]);
16632 assert_eq!(mi.view(), mi);
16633 assert_eq!(mi.transpose(), mi);
16634 assert_eq!(mi.T(), mi);
16635 assert_eq!(mi.to_frame(), tuples);
16636 assert_eq!(
16637 mi.to_series(),
16638 tuples
16639 .iter()
16640 .cloned()
16641 .map(|tuple| (tuple.clone(), tuple))
16642 .collect::<Vec<_>>()
16643 );
16644 }
16645
16646 #[test]
16647 fn multi_index_levels_codes_and_levshape_exclude_missing_labels() {
16648 let mi = MultiIndex::from_tuples(vec![
16649 vec!["a".into(), 1_i64.into()],
16650 vec![IndexLabel::Datetime64(i64::MIN), 2_i64.into()],
16651 vec!["a".into(), 1_i64.into()],
16652 ])
16653 .unwrap()
16654 .set_names(vec![Some("letter".into()), Some("number".into())]);
16655
16656 let levels = mi.levels();
16657 assert_eq!(levels[0].labels(), &[IndexLabel::Utf8("a".into())]);
16658 assert_eq!(levels[0].name(), Some("letter"));
16659 assert_eq!(
16660 levels[1].labels(),
16661 &[IndexLabel::Int64(1), IndexLabel::Int64(2)]
16662 );
16663 assert_eq!(levels[1].name(), Some("number"));
16664 assert_eq!(mi.codes(), vec![vec![0, -1, 0], vec![0, 1, 0]]);
16665 assert_eq!(mi.levshape(), vec![1, 2]);
16666 assert!(mi.memory_usage(false) <= mi.memory_usage(true));
16667 assert_eq!(mi.nbytes(), mi.memory_usage(false));
16668 }
16669
16670 #[test]
16671 fn multi_index_dtype_type_checks_and_item_match_object_index_shape() {
16672 let mi = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])
16673 .unwrap()
16674 .set_names(vec![Some("letter".into()), Some("number".into())]);
16675
16676 assert_eq!(mi.dtype(), "object");
16677 assert_eq!(mi.dtypes(), vec!["object", "int64"]);
16678 assert_eq!(mi.inferred_type(), "mixed");
16679 assert_eq!(mi.infer_objects(), mi);
16680 assert!(!mi.holds_integer());
16681 assert!(!mi.is_boolean());
16682 assert!(!mi.is_categorical());
16683 assert!(!mi.is_floating());
16684 assert!(!mi.is_integer());
16685 assert!(!mi.is_interval());
16686 assert!(!mi.is_numeric());
16687 assert!(mi.is_object());
16688 assert!(mi.is_(&mi));
16689 assert_eq!(
16690 mi.item().unwrap(),
16691 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)]
16692 );
16693
16694 let multi = mi.repeat(2);
16695 assert!(multi.item().is_err());
16696 }
16697
16698 #[test]
16699 fn multi_index_missing_masks_fillna_putmask_where_and_map() {
16700 let mi = MultiIndex::from_tuples(vec![
16701 vec!["a".into(), 1_i64.into()],
16702 vec![IndexLabel::Datetime64(i64::MIN), 2_i64.into()],
16703 vec!["b".into(), IndexLabel::Timedelta64(Timedelta::NAT)],
16704 vec!["c".into(), 3_i64.into()],
16705 ])
16706 .unwrap()
16707 .set_names(vec![Some("letter".into()), Some("number".into())]);
16708
16709 let missing_mask_errors = [
16710 mi.hasnans().unwrap_err(),
16711 mi.isna().unwrap_err(),
16712 mi.isnull().unwrap_err(),
16713 mi.notna().unwrap_err(),
16714 mi.notnull().unwrap_err(),
16715 ];
16716 for err in missing_mask_errors {
16717 assert!(matches!(
16718 err,
16719 super::IndexError::InvalidArgument(message)
16720 if message == "isna is not defined for MultiIndex"
16721 ));
16722 }
16723 assert_eq!(mi.copy(), mi);
16724 assert_eq!(mi.remove_unused_levels(), mi);
16725
16726 let scalar_filled = mi.fillna(&IndexLabel::Utf8("missing".into()));
16727 assert_eq!(
16728 scalar_filled.to_list(),
16729 vec![
16730 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
16731 vec![IndexLabel::Utf8("missing".into()), IndexLabel::Int64(2)],
16732 vec![
16733 IndexLabel::Utf8("b".into()),
16734 IndexLabel::Utf8("missing".into())
16735 ],
16736 vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)],
16737 ]
16738 );
16739
16740 let tuple_filled = mi
16741 .fillna_tuple(&[IndexLabel::Utf8("z".into()), IndexLabel::Int64(0)])
16742 .unwrap();
16743 assert_eq!(
16744 tuple_filled.to_list(),
16745 vec![
16746 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
16747 vec![IndexLabel::Utf8("z".into()), IndexLabel::Int64(2)],
16748 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(0)],
16749 vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)],
16750 ]
16751 );
16752 assert!(
16753 mi.fillna_tuple(&[IndexLabel::Utf8("short".into())])
16754 .is_err()
16755 );
16756
16757 let masked = mi
16758 .putmask(
16759 &[false, true, false, true],
16760 vec![IndexLabel::Utf8("x".into()), IndexLabel::Int64(9)],
16761 )
16762 .unwrap();
16763 assert_eq!(
16764 masked.to_list(),
16765 vec![
16766 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
16767 vec![IndexLabel::Utf8("x".into()), IndexLabel::Int64(9)],
16768 vec![
16769 IndexLabel::Utf8("b".into()),
16770 IndexLabel::Timedelta64(Timedelta::NAT)
16771 ],
16772 vec![IndexLabel::Utf8("x".into()), IndexLabel::Int64(9)],
16773 ]
16774 );
16775 assert!(
16776 mi.putmask(&[true], vec![IndexLabel::Utf8("x".into())])
16777 .is_err()
16778 );
16779
16780 let where_result = mi
16781 .r#where(
16782 &[true, false, true, false],
16783 vec![IndexLabel::Utf8("fallback".into()), IndexLabel::Int64(5)],
16784 )
16785 .unwrap();
16786 assert_eq!(
16787 where_result.to_list(),
16788 vec![
16789 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
16790 vec![IndexLabel::Utf8("fallback".into()), IndexLabel::Int64(5)],
16791 vec![
16792 IndexLabel::Utf8("b".into()),
16793 IndexLabel::Timedelta64(Timedelta::NAT)
16794 ],
16795 vec![IndexLabel::Utf8("fallback".into()), IndexLabel::Int64(5)],
16796 ]
16797 );
16798
16799 let rendered = mi.map(|tuple| {
16800 tuple
16801 .iter()
16802 .map(ToString::to_string)
16803 .collect::<Vec<_>>()
16804 .join("|")
16805 });
16806 assert_eq!(rendered[0], "a|1");
16807 assert_eq!(rendered[3], "c|3");
16808 }
16809
16810 #[test]
16811 fn multi_index_set_levels_and_set_codes_rebuild_from_pandas_catalogs() {
16812 let mi = MultiIndex::from_tuples(vec![
16813 vec!["a".into(), 1_i64.into()],
16814 vec!["b".into(), 2_i64.into()],
16815 vec!["a".into(), 1_i64.into()],
16816 ])
16817 .unwrap()
16818 .set_names(vec![Some("letter".into()), Some("number".into())]);
16819
16820 let relabeled = mi
16821 .set_levels(vec![
16822 vec![IndexLabel::Utf8("x".into()), IndexLabel::Utf8("y".into())],
16823 vec![IndexLabel::Int64(10), IndexLabel::Int64(20)],
16824 ])
16825 .unwrap();
16826 assert_eq!(
16827 relabeled.to_list(),
16828 vec![
16829 vec![IndexLabel::Utf8("x".into()), IndexLabel::Int64(10)],
16830 vec![IndexLabel::Utf8("y".into()), IndexLabel::Int64(20)],
16831 vec![IndexLabel::Utf8("x".into()), IndexLabel::Int64(10)],
16832 ]
16833 );
16834 assert_eq!(relabeled.names(), mi.names());
16835 assert!(
16836 mi.set_levels(vec![vec![IndexLabel::Utf8("only".into())]])
16837 .is_err()
16838 );
16839 assert!(
16840 mi.set_levels(vec![
16841 vec![IndexLabel::Utf8("x".into())],
16842 vec![IndexLabel::Int64(10), IndexLabel::Int64(20)],
16843 ])
16844 .is_err()
16845 );
16846
16847 let recoded = mi.set_codes(vec![vec![1, 0, 1], vec![1, -1, 0]]).unwrap();
16848 assert_eq!(
16849 recoded.to_list(),
16850 vec![
16851 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
16852 vec![
16853 IndexLabel::Utf8("a".into()),
16854 IndexLabel::Datetime64(i64::MIN)
16855 ],
16856 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(1)],
16857 ]
16858 );
16859 assert_eq!(recoded.names(), mi.names());
16860 assert!(mi.set_codes(vec![vec![0, 1, 0]]).is_err());
16861 assert!(mi.set_codes(vec![vec![0, 1], vec![0, 1, 0]]).is_err());
16862 assert!(mi.set_codes(vec![vec![0, 1, 0], vec![0, 99, 0]]).is_err());
16863 }
16864
16865 #[test]
16866 fn multi_index_equals_identical_and_equal_levels_match_pandas_names() {
16867 let left = MultiIndex::from_tuples(vec![
16868 vec!["a".into(), 1_i64.into()],
16869 vec!["b".into(), 2_i64.into()],
16870 ])
16871 .unwrap()
16872 .set_names(vec![Some("letter".into()), Some("number".into())]);
16873 let renamed = left
16874 .clone()
16875 .set_names(vec![Some("letter".into()), Some("other".into())]);
16876 let reordered = MultiIndex::from_tuples(vec![
16877 vec!["b".into(), 2_i64.into()],
16878 vec!["a".into(), 1_i64.into()],
16879 ])
16880 .unwrap()
16881 .set_names(vec![Some("letter".into()), Some("number".into())]);
16882
16883 assert!(left.equals(&renamed));
16884 assert!(!left.identical(&renamed));
16885 assert!(left.equal_levels(&renamed));
16886 assert!(!left.equals(&reordered));
16887 assert!(!left.equal_levels(&reordered));
16888 }
16889
16890 #[test]
16891 fn multi_index_to_flat_index() {
16892 let mi = MultiIndex::from_tuples(vec![
16893 vec!["a".into(), 1_i64.into()],
16894 vec!["b".into(), 2_i64.into()],
16895 ])
16896 .unwrap();
16897
16898 let flat = mi.to_flat_index("_");
16899 assert_eq!(flat.labels()[0], IndexLabel::Utf8("a_1".into()));
16900 assert_eq!(flat.labels()[1], IndexLabel::Utf8("b_2".into()));
16901 }
16902
16903 #[test]
16904 fn multi_index_droplevel() {
16905 let mi = MultiIndex::from_tuples(vec![
16906 vec!["a".into(), 1_i64.into(), "x".into()],
16907 vec!["b".into(), 2_i64.into(), "y".into()],
16908 ])
16909 .unwrap()
16910 .set_names(vec![
16911 Some("l0".into()),
16912 Some("l1".into()),
16913 Some("l2".into()),
16914 ]);
16915
16916 let result = mi.droplevel(1).unwrap();
16918 assert!(
16919 matches!(&result, super::MultiIndexOrIndex::Multi(_)),
16920 "expected MultiIndex after dropping from 3 levels"
16921 );
16922 if let super::MultiIndexOrIndex::Multi(mi2) = result {
16923 assert_eq!(mi2.nlevels(), 2);
16924 assert_eq!(mi2.names(), &[Some("l0".into()), Some("l2".into())]);
16925 }
16926 }
16927
16928 #[test]
16929 fn multi_index_droplevel_to_index() {
16930 let mi = MultiIndex::from_tuples(vec![
16931 vec!["a".into(), 1_i64.into()],
16932 vec!["b".into(), 2_i64.into()],
16933 ])
16934 .unwrap()
16935 .set_names(vec![Some("letter".into()), Some("number".into())]);
16936
16937 let result = mi.droplevel(0).unwrap();
16939 assert!(
16940 matches!(&result, super::MultiIndexOrIndex::Index(_)),
16941 "expected Index after dropping from 2 levels"
16942 );
16943 if let super::MultiIndexOrIndex::Index(idx) = result {
16944 assert_eq!(idx.labels(), &[IndexLabel::Int64(1), IndexLabel::Int64(2)]);
16945 assert_eq!(idx.name(), Some("number"));
16946 }
16947 }
16948
16949 #[test]
16950 fn multi_index_swaplevel() {
16951 let mi = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])
16952 .unwrap()
16953 .set_names(vec![Some("first".into()), Some("second".into())]);
16954
16955 let swapped = mi.swaplevel(0, 1).unwrap();
16956 assert_eq!(
16957 swapped.names(),
16958 &[Some("second".into()), Some("first".into())]
16959 );
16960 assert_eq!(
16961 swapped.get_tuple(0).unwrap(),
16962 vec![&IndexLabel::Int64(1), &IndexLabel::Utf8("a".into())]
16963 );
16964 }
16965
16966 #[test]
16967 fn multi_index_empty() {
16968 let mi = MultiIndex::from_tuples(vec![]).unwrap();
16969 assert_eq!(mi.nlevels(), 0);
16970 assert_eq!(mi.len(), 0);
16971 assert!(mi.is_empty());
16972 }
16973
16974 #[test]
16975 fn multi_index_get_tuple_out_of_bounds() {
16976 let mi = MultiIndex::from_tuples(vec![vec!["a".into()]]).unwrap();
16977 assert!(mi.get_tuple(1).is_none());
16978 }
16979
16980 #[test]
16981 fn multi_index_get_loc_tuple_exact_and_duplicates() {
16982 let mi = MultiIndex::from_arrays(vec![
16983 vec!["east".into(), "east".into(), "west".into(), "east".into()],
16984 vec!["A".into(), "B".into(), "A".into(), "A".into()],
16985 ])
16986 .unwrap();
16987
16988 let positions = mi
16989 .get_loc_tuple(&[
16990 IndexLabel::Utf8("east".into()),
16991 IndexLabel::Utf8("A".into()),
16992 ])
16993 .unwrap();
16994 assert_eq!(positions, vec![0, 3]);
16995 }
16996
16997 #[test]
16998 fn multi_index_get_loc_level_prefix_returns_remaining_index() {
16999 let mi = MultiIndex::from_arrays(vec![
17000 vec!["east".into(), "east".into(), "west".into()],
17001 vec!["A".into(), "B".into(), "A".into()],
17002 ])
17003 .unwrap()
17004 .set_names(vec![Some("region".into()), Some("product".into())]);
17005
17006 let (positions, remaining) = mi
17007 .get_loc_level(&[IndexLabel::Utf8("east".into())])
17008 .unwrap();
17009 assert_eq!(positions, vec![0, 1]);
17010 assert!(matches!(
17011 &remaining,
17012 Some(super::MultiIndexOrIndex::Index(index))
17013 if index.labels()
17014 == [IndexLabel::Utf8("A".into()), IndexLabel::Utf8("B".into())]
17015 && index.name() == Some("product")
17016 ));
17017 }
17018
17019 #[test]
17020 fn multi_index_groupby_join_groups_duplicate_tuples_d89fe3() {
17021 let mi = MultiIndex::from_tuples(vec![
17022 vec!["a".into(), 1_i64.into()],
17023 vec!["b".into(), 2_i64.into()],
17024 vec!["a".into(), 1_i64.into()],
17025 ])
17026 .unwrap();
17027
17028 let groups = mi.groupby();
17029 assert_eq!(
17030 groups[&vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)]],
17031 vec![0, 2]
17032 );
17033 assert_eq!(
17034 groups[&vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)]],
17035 vec![1]
17036 );
17037 }
17038
17039 #[test]
17040 fn multi_index_groupby_join_modes_d89fe3() {
17041 let left = MultiIndex::from_tuples(vec![
17042 vec!["a".into(), 1_i64.into()],
17043 vec!["b".into(), 2_i64.into()],
17044 vec!["c".into(), 3_i64.into()],
17045 ])
17046 .unwrap();
17047 let right = MultiIndex::from_tuples(vec![
17048 vec!["b".into(), 2_i64.into()],
17049 vec!["d".into(), 4_i64.into()],
17050 ])
17051 .unwrap();
17052
17053 assert_eq!(left.join(&right, "left").unwrap(), left);
17054 assert_eq!(left.join(&right, "right").unwrap(), right);
17055 assert_eq!(
17056 left.join(&right, "inner").unwrap().to_list(),
17057 vec![vec!["b".into(), 2_i64.into()]]
17058 );
17059 assert_eq!(
17060 left.join(&right, "outer").unwrap().to_list(),
17061 vec![
17062 vec!["a".into(), 1_i64.into()],
17063 vec!["b".into(), 2_i64.into()],
17064 vec!["c".into(), 3_i64.into()],
17065 vec!["d".into(), 4_i64.into()]
17066 ]
17067 );
17068 }
17069
17070 #[test]
17071 fn multi_index_groupby_join_rejects_bad_mode_and_level_mismatch_d89fe3() {
17072 let left = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]]).unwrap();
17073 let right = MultiIndex::from_tuples(vec![vec!["a".into()]]).unwrap();
17074
17075 assert!(left.join(&right, "sideways").is_err());
17076 assert!(left.join(&right, "inner").is_err());
17077 assert!(left.join(&right, "outer").is_err());
17078 }
17079
17080 #[test]
17081 fn multi_index_slice_locs_uses_lexicographic_bounds() {
17082 let mi = MultiIndex::from_arrays(vec![
17083 vec!["east".into(), "east".into(), "west".into(), "west".into()],
17084 vec![1_i64.into(), 2_i64.into(), 1_i64.into(), 2_i64.into()],
17085 ])
17086 .unwrap();
17087
17088 let (start, stop) = mi
17089 .slice_locs(
17090 Some(&[IndexLabel::Utf8("east".into()), IndexLabel::Int64(2)]),
17091 Some(&[IndexLabel::Utf8("west".into()), IndexLabel::Int64(1)]),
17092 )
17093 .unwrap();
17094 assert_eq!((start, stop), (1, 3));
17095 }
17096
17097 #[test]
17098 fn multi_index_slice_bound_partial_prefixes_d89fe2() {
17099 let mi = MultiIndex::from_arrays(vec![
17100 vec!["east".into(), "east".into(), "west".into(), "west".into()],
17101 vec![1_i64.into(), 2_i64.into(), 1_i64.into(), 2_i64.into()],
17102 ])
17103 .unwrap();
17104
17105 let east = [IndexLabel::Utf8("east".into())];
17106 assert_eq!(mi.get_slice_bound(&east, "left").unwrap(), 0);
17107 assert_eq!(mi.get_slice_bound(&east, "right").unwrap(), 2);
17108
17109 let west = [IndexLabel::Utf8("west".into())];
17110 assert_eq!(mi.slice_indexer(Some(&west), None).unwrap(), (2, 4));
17111 assert_eq!(mi.slice_indexer(None, Some(&east)).unwrap(), (0, 2));
17112 }
17113
17114 #[test]
17115 fn multi_index_slice_bound_full_tuple_and_missing_insert_d89fe2() {
17116 let mi = MultiIndex::from_arrays(vec![
17117 vec!["east".into(), "east".into(), "west".into(), "west".into()],
17118 vec![1_i64.into(), 2_i64.into(), 1_i64.into(), 2_i64.into()],
17119 ])
17120 .unwrap();
17121
17122 let exact = [IndexLabel::Utf8("east".into()), IndexLabel::Int64(2)];
17123 assert_eq!(mi.get_slice_bound(&exact, "left").unwrap(), 1);
17124 assert_eq!(mi.get_slice_bound(&exact, "right").unwrap(), 2);
17125
17126 let missing_insert = [IndexLabel::Utf8("east".into()), IndexLabel::Int64(3)];
17127 assert_eq!(mi.get_slice_bound(&missing_insert, "left").unwrap(), 2);
17128 assert_eq!(mi.get_slice_bound(&missing_insert, "right").unwrap(), 2);
17129 }
17130
17131 #[test]
17132 fn multi_index_slice_bound_rejects_invalid_side_d89fe2() {
17133 let mi = MultiIndex::from_tuples(vec![vec![IndexLabel::Utf8("east".into())]]).unwrap();
17134 let key = [IndexLabel::Utf8("east".into())];
17135
17136 assert!(mi.get_slice_bound(&key, "middle").is_err());
17137 }
17138
17139 #[test]
17140 fn multi_index_truncate_uses_prefix_bounds_d89fe11() -> Result<(), super::IndexError> {
17141 let mi = MultiIndex::from_tuples(vec![
17142 vec!["a".into(), 1_i64.into()],
17143 vec!["a".into(), 3_i64.into()],
17144 vec!["b".into(), 1_i64.into()],
17145 vec!["c".into(), 1_i64.into()],
17146 ])?
17147 .set_names(vec![Some("letter".into()), Some("number".into())]);
17148
17149 let bounded = mi.truncate(
17150 Some(&[IndexLabel::Utf8("a".into())]),
17151 Some(&[IndexLabel::Utf8("b".into())]),
17152 )?;
17153 assert_eq!(
17154 bounded.to_list(),
17155 vec![
17156 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
17157 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(3)],
17158 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(1)],
17159 ]
17160 );
17161 assert_eq!(bounded.names(), mi.names());
17162
17163 let tail = mi.truncate(Some(&[IndexLabel::Utf8("b".into())]), None)?;
17164 assert_eq!(
17165 tail.to_list(),
17166 vec![
17167 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(1)],
17168 vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(1)],
17169 ]
17170 );
17171
17172 let clipped = mi.truncate(None, Some(&[IndexLabel::Utf8("aa".into())]))?;
17173 assert_eq!(
17174 clipped.to_list(),
17175 vec![
17176 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
17177 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(3)],
17178 ]
17179 );
17180
17181 let empty = mi.truncate(Some(&[IndexLabel::Utf8("d".into())]), None)?;
17182 assert!(empty.is_empty());
17183 assert_eq!(empty.names(), mi.names());
17184
17185 Ok(())
17186 }
17187
17188 #[test]
17189 fn multi_index_get_locs_prefix_and_exact_selectors_d89fe10() -> Result<(), super::IndexError> {
17190 let mi = MultiIndex::from_tuples(vec![
17191 vec!["a".into(), 1_i64.into()],
17192 vec!["a".into(), 2_i64.into()],
17193 vec!["b".into(), 1_i64.into()],
17194 vec!["b".into(), 2_i64.into()],
17195 ])?;
17196
17197 assert_eq!(mi.get_locs(&[IndexLabel::Utf8("a".into())])?, vec![0, 1]);
17198 assert_eq!(
17199 mi.get_locs(&[IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)])?,
17200 vec![0]
17201 );
17202 assert_eq!(mi.get_locs(&[])?, Vec::<usize>::new());
17203
17204 Ok(())
17205 }
17206
17207 #[test]
17208 fn multi_index_get_locs_rejects_missing_and_overlong_keys_d89fe10()
17209 -> Result<(), super::IndexError> {
17210 let mi = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?;
17211
17212 assert!(mi.get_locs(&[IndexLabel::Utf8("z".into())]).is_err());
17213 assert!(
17214 mi.get_locs(&[
17215 IndexLabel::Utf8("a".into()),
17216 IndexLabel::Int64(1),
17217 IndexLabel::Utf8("extra".into()),
17218 ])
17219 .is_err()
17220 );
17221
17222 Ok(())
17223 }
17224
17225 #[test]
17226 fn multi_index_get_indexer_non_unique_expands_duplicate_matches() {
17227 let source = MultiIndex::from_tuples(vec![
17228 vec!["a".into(), 1_i64.into()],
17229 vec!["a".into(), 2_i64.into()],
17230 vec!["b".into(), 1_i64.into()],
17231 vec!["a".into(), 1_i64.into()],
17232 ])
17233 .unwrap();
17234 let target = MultiIndex::from_tuples(vec![
17235 vec!["a".into(), 1_i64.into()],
17236 vec!["z".into(), 9_i64.into()],
17237 vec!["a".into(), 2_i64.into()],
17238 vec!["a".into(), 1_i64.into()],
17239 ])
17240 .unwrap();
17241
17242 let (indexer, missing) = source.get_indexer_non_unique(&target);
17243 assert_eq!(indexer, vec![0, 3, -1, 1, 0, 3]);
17244 assert_eq!(missing, vec![1]);
17245 }
17246
17247 #[test]
17248 fn multi_index_setop_packed_matches_reference_misetop() {
17249 let mk = |spec: &[(&str, i64)]| {
17253 MultiIndex::from_tuples(
17254 spec.iter()
17255 .map(|(s, i)| vec![IndexLabel::Utf8((*s).to_string()), IndexLabel::Int64(*i)])
17256 .collect::<Vec<_>>(),
17257 )
17258 .unwrap()
17259 };
17260 let cases: Vec<(Vec<(&str, i64)>, Vec<(&str, i64)>)> = vec![
17261 (
17262 vec![("a", 1), ("b", 2), ("a", 1), ("c", 3), ("b", 2)],
17263 vec![("b", 2), ("c", 3), ("z", 9)],
17264 ),
17265 (vec![("a", 1), ("b", 2)], vec![("x", 7), ("y", 8)]),
17266 (vec![("a", 1), ("a", 1), ("b", 2)], vec![("a", 1)]),
17267 ];
17268 for (sa, sb) in cases {
17269 let a = mk(&sa);
17270 let b = mk(&sb);
17271 let bset: std::collections::HashSet<Vec<IndexLabel>> = b.to_list().into_iter().collect();
17272
17273 let mut seen = std::collections::HashSet::new();
17274 let ref_inter: Vec<Vec<IndexLabel>> = a
17275 .to_list()
17276 .into_iter()
17277 .filter(|t| bset.contains(t) && seen.insert(t.clone()))
17278 .collect();
17279 assert_eq!(a.intersection(&b).unwrap().to_list(), ref_inter, "inter {sa:?}");
17280
17281 let mut seen_d = std::collections::HashSet::new();
17282 let ref_diff: Vec<Vec<IndexLabel>> = a
17283 .to_list()
17284 .into_iter()
17285 .filter(|t| !bset.contains(t) && seen_d.insert(t.clone()))
17286 .collect();
17287 assert_eq!(a.difference(&b).unwrap().to_list(), ref_diff, "diff {sa:?}");
17288 }
17289 }
17290
17291 #[test]
17292 fn multi_index_duplicated_packed_matches_vec_reference_midedup() {
17293 let n = 400usize;
17297 let mut state: u64 = 0x9e37_79b9_7f4a_7c15;
17298 let mut l0 = Vec::with_capacity(n);
17299 let mut l1 = Vec::with_capacity(n);
17300 for _ in 0..n {
17301 state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
17302 l0.push(IndexLabel::Utf8(format!("g{}", (state >> 40) % 6)));
17303 state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
17304 l1.push(IndexLabel::Int64(((state >> 40) % 5) as i64));
17305 }
17306 let mi = MultiIndex::from_arrays(vec![l0, l1]).unwrap();
17307 let rows = mi.to_list();
17308
17309 for keep in [DuplicateKeep::First, DuplicateKeep::Last, DuplicateKeep::None] {
17310 let mut want = vec![false; n];
17311 match keep {
17312 DuplicateKeep::First => {
17313 let mut seen = std::collections::HashSet::new();
17314 for (r, w) in want.iter_mut().enumerate() {
17315 if !seen.insert(rows[r].clone()) {
17316 *w = true;
17317 }
17318 }
17319 }
17320 DuplicateKeep::Last => {
17321 let mut seen = std::collections::HashSet::new();
17322 for r in (0..n).rev() {
17323 if !seen.insert(rows[r].clone()) {
17324 want[r] = true;
17325 }
17326 }
17327 }
17328 DuplicateKeep::None => {
17329 let mut counts: std::collections::HashMap<Vec<IndexLabel>, usize> =
17330 Default::default();
17331 for r in &rows {
17332 *counts.entry(r.clone()).or_insert(0) += 1;
17333 }
17334 for (r, w) in want.iter_mut().enumerate() {
17335 if counts[&rows[r]] > 1 {
17336 *w = true;
17337 }
17338 }
17339 }
17340 }
17341 assert_eq!(mi.duplicated(keep), want, "duplicated {keep:?}");
17342 }
17343 let mut seen = std::collections::HashSet::new();
17345 let kept: Vec<Vec<IndexLabel>> = rows
17346 .iter()
17347 .filter(|r| seen.insert((*r).clone()))
17348 .cloned()
17349 .collect();
17350 assert_eq!(mi.unique().to_list(), kept);
17351 assert_eq!(mi.nunique(), kept.len());
17352 }
17353
17354 #[test]
17355 fn multi_index_argsort_packed_matches_tuple_sort_misort() {
17356 let n = 600usize;
17360 let mut state: u64 = 0x1234_5678_9abc_def1;
17361 let mut l0 = Vec::with_capacity(n);
17362 let mut l1 = Vec::with_capacity(n);
17363 for _ in 0..n {
17364 state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
17365 let a = (state >> 33) % 7; state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
17367 let b = (state >> 33) % 5;
17368 l0.push(IndexLabel::Utf8(format!("g{a}")));
17369 l1.push(IndexLabel::Int64(b as i64));
17370 }
17371 let mi = MultiIndex::from_arrays(vec![l0, l1]).unwrap();
17372
17373 let rows = mi.to_list();
17375 let mut want: Vec<usize> = (0..n).collect();
17376 want.sort_by(|&a, &b| rows[a].cmp(&rows[b]).then(a.cmp(&b)));
17377
17378 assert_eq!(mi.argsort(), want, "argsort");
17379 assert_eq!(mi.sort_values().to_list(), mi.take_existing_positions(&want).to_list());
17380 assert_eq!(mi.min(), Some(rows[want[0]].clone()));
17382 assert_eq!(mi.max(), Some(rows[want[n - 1]].clone()));
17383 }
17384
17385 #[test]
17386 fn multi_index_get_indexer_packed_matches_vec_reference_mipack() {
17387 let mk = |spec: &[(&str, i64)]| {
17391 MultiIndex::from_tuples(
17392 spec.iter()
17393 .map(|(s, i)| vec![IndexLabel::Utf8((*s).to_string()), IndexLabel::Int64(*i)])
17394 .collect::<Vec<_>>(),
17395 )
17396 .unwrap()
17397 };
17398 let source = mk(&[("a", 1), ("a", 2), ("b", 1), ("a", 1), ("c", 5), ("b", 2)]);
17399 let target = mk(&[("b", 1), ("z", 9), ("a", 1), ("a", 2), ("c", 5), ("q", 0), ("b", 2)]);
17400 let src_rows = source.to_list();
17401 let tgt_rows = target.to_list();
17402
17403 let mut pos: std::collections::HashMap<Vec<IndexLabel>, Vec<usize>> = Default::default();
17404 for (r, key) in src_rows.iter().enumerate() {
17405 pos.entry(key.clone()).or_default().push(r);
17406 }
17407 let mut ref_ix = Vec::new();
17408 let mut ref_miss = Vec::new();
17409 for (tr, key) in tgt_rows.iter().enumerate() {
17410 if let Some(m) = pos.get(key) {
17411 ref_ix.extend(m.iter().map(|&p| p as isize));
17412 } else {
17413 ref_ix.push(-1);
17414 ref_miss.push(tr);
17415 }
17416 }
17417 let (ix, miss) = source.get_indexer_non_unique(&target);
17418 assert_eq!(ix, ref_ix, "non_unique indexer");
17419 assert_eq!(miss, ref_miss, "non_unique missing");
17420
17421 let usrc = mk(&[("a", 1), ("a", 2), ("b", 1), ("c", 5), ("b", 2)]);
17422 let urows = usrc.to_list();
17423 let mut upos: std::collections::HashMap<Vec<IndexLabel>, isize> = Default::default();
17424 for (r, key) in urows.iter().enumerate() {
17425 upos.entry(key.clone()).or_insert(r as isize);
17426 }
17427 let ref_u: Vec<isize> = tgt_rows
17428 .iter()
17429 .map(|k| upos.get(k).copied().unwrap_or(-1))
17430 .collect();
17431 assert_eq!(usrc.get_indexer(&target).unwrap(), ref_u, "unique indexer");
17432 }
17433
17434 #[test]
17435 fn multi_index_get_indexer_unique_maps_hits_and_missing_d89fe1() -> Result<(), super::IndexError>
17436 {
17437 let source = MultiIndex::from_tuples(vec![
17438 vec!["a".into(), 1_i64.into()],
17439 vec!["b".into(), 2_i64.into()],
17440 vec!["c".into(), 3_i64.into()],
17441 ])?;
17442 let target = MultiIndex::from_tuples(vec![
17443 vec!["b".into(), 2_i64.into()],
17444 vec!["z".into(), 9_i64.into()],
17445 vec!["a".into(), 1_i64.into()],
17446 ])?;
17447
17448 assert_eq!(source.get_indexer(&target)?, vec![1, -1, 0]);
17449 assert_eq!(source.get_indexer_for(&target)?, vec![1, -1, 0]);
17450
17451 Ok(())
17452 }
17453
17454 #[test]
17455 fn multi_index_get_indexer_rejects_duplicate_source_d89fe1() -> Result<(), super::IndexError> {
17456 let source = MultiIndex::from_tuples(vec![
17457 vec!["a".into(), 1_i64.into()],
17458 vec!["a".into(), 1_i64.into()],
17459 vec!["b".into(), 2_i64.into()],
17460 ])?;
17461 let target = MultiIndex::from_tuples(vec![
17462 vec!["a".into(), 1_i64.into()],
17463 vec!["b".into(), 2_i64.into()],
17464 ])?;
17465
17466 let err = match source.get_indexer(&target) {
17467 Ok(indexer) => {
17468 return Err(super::IndexError::InvalidArgument(format!(
17469 "duplicate source index unexpectedly returned {indexer:?}"
17470 )));
17471 }
17472 Err(err) => err,
17473 };
17474 assert!(matches!(
17475 err,
17476 super::IndexError::InvalidArgument(message)
17477 if message == "get_indexer requires a uniquely valued MultiIndex"
17478 ));
17479 assert_eq!(source.get_indexer_for(&target)?, vec![0, 1, 2]);
17480
17481 Ok(())
17482 }
17483
17484 #[test]
17485 fn multi_index_get_indexer_level_mismatch_marks_missing_d89fe1() -> Result<(), super::IndexError>
17486 {
17487 let source = MultiIndex::from_tuples(vec![
17488 vec!["a".into(), 1_i64.into()],
17489 vec!["b".into(), 2_i64.into()],
17490 ])?;
17491 let target = MultiIndex::from_tuples(vec![vec!["a".into()], vec!["b".into()]])?;
17492
17493 assert_eq!(source.get_indexer(&target)?, vec![-1, -1]);
17494 assert_eq!(source.get_indexer_for(&target)?, vec![-1, -1]);
17495
17496 Ok(())
17497 }
17498
17499 #[test]
17500 fn multi_index_reindex_maps_target_hits_and_missing_d89fe4() -> Result<(), super::IndexError> {
17501 let source = MultiIndex::from_tuples(vec![
17502 vec!["a".into(), 1_i64.into()],
17503 vec!["b".into(), 2_i64.into()],
17504 vec!["c".into(), 3_i64.into()],
17505 ])?;
17506 let target = MultiIndex::from_tuples(vec![
17507 vec!["b".into(), 2_i64.into()],
17508 vec!["z".into(), 9_i64.into()],
17509 vec!["a".into(), 1_i64.into()],
17510 ])?
17511 .set_names(vec![Some("letter".into()), Some("number".into())]);
17512
17513 let (reindexed, indexer) = source.reindex(&target)?;
17514 assert_eq!(reindexed, target);
17515 assert_eq!(indexer, vec![1, -1, 0]);
17516
17517 Ok(())
17518 }
17519
17520 #[test]
17521 fn multi_index_reindex_rejects_duplicate_source_d89fe4() -> Result<(), super::IndexError> {
17522 let source = MultiIndex::from_tuples(vec![
17523 vec!["a".into(), 1_i64.into()],
17524 vec!["a".into(), 1_i64.into()],
17525 ])?;
17526 let target = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?;
17527
17528 assert!(source.reindex(&target).is_err());
17529
17530 Ok(())
17531 }
17532
17533 #[test]
17534 fn multi_index_reindex_level_mismatch_marks_missing_d89fe4() -> Result<(), super::IndexError> {
17535 let source = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?;
17536 let target = MultiIndex::from_tuples(vec![vec!["a".into()]])?;
17537
17538 let (reindexed, indexer) = source.reindex(&target)?;
17539 assert_eq!(reindexed, target);
17540 assert_eq!(indexer, vec![-1]);
17541
17542 Ok(())
17543 }
17544
17545 #[test]
17546 fn multi_index_rename_replaces_all_names_d89fe5() -> Result<(), super::IndexError> {
17547 let source = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?
17548 .set_names(vec![Some("old0".into()), Some("old1".into())]);
17549
17550 let renamed = source.rename(vec![Some("new0".into()), Some("new1".into())])?;
17551
17552 assert_eq!(renamed.names(), &[Some("new0".into()), Some("new1".into())]);
17553 assert_eq!(source.names(), &[Some("old0".into()), Some("old1".into())]);
17554 assert_eq!(renamed.to_list(), source.to_list());
17555
17556 Ok(())
17557 }
17558
17559 #[test]
17560 fn multi_index_rename_level_replaces_one_name_d89fe5() -> Result<(), super::IndexError> {
17561 let source = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?
17562 .set_names(vec![Some("old0".into()), Some("old1".into())]);
17563
17564 let renamed = source.rename_level(Some("new1".into()), 1)?;
17565
17566 assert_eq!(renamed.names(), &[Some("old0".into()), Some("new1".into())]);
17567 assert_eq!(source.names(), &[Some("old0".into()), Some("old1".into())]);
17568
17569 Ok(())
17570 }
17571
17572 #[test]
17573 fn multi_index_rename_rejects_wrong_name_count_d89fe5() -> Result<(), super::IndexError> {
17574 let source = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?;
17575
17576 let err = source.rename(vec![Some("only".into())]).unwrap_err();
17577
17578 assert!(matches!(
17579 err,
17580 super::IndexError::LengthMismatch {
17581 expected: 2,
17582 actual: 1,
17583 ..
17584 }
17585 ));
17586
17587 Ok(())
17588 }
17589
17590 #[test]
17591 fn multi_index_rename_level_rejects_out_of_bounds_d89fe5() -> Result<(), super::IndexError> {
17592 let source = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?;
17593
17594 let err = source.rename_level(Some("missing".into()), 2).unwrap_err();
17595
17596 assert!(matches!(
17597 err,
17598 super::IndexError::OutOfBounds {
17599 position: 2,
17600 length: 2
17601 }
17602 ));
17603
17604 Ok(())
17605 }
17606
17607 #[test]
17608 fn multi_index_searchsorted_left_and_right_d89fe6() -> Result<(), super::IndexError> {
17609 let source = MultiIndex::from_tuples(vec![
17610 vec!["a".into(), 1_i64.into()],
17611 vec!["a".into(), 3_i64.into()],
17612 vec!["b".into(), 2_i64.into()],
17613 vec!["b".into(), 2_i64.into()],
17614 ])?;
17615 let target = MultiIndex::from_tuples(vec![
17616 vec!["a".into(), 0_i64.into()],
17617 vec!["a".into(), 1_i64.into()],
17618 vec!["a".into(), 2_i64.into()],
17619 vec!["a".into(), 3_i64.into()],
17620 vec!["b".into(), 2_i64.into()],
17621 vec!["c".into(), 0_i64.into()],
17622 ])?;
17623
17624 assert_eq!(
17625 source.searchsorted(&target, "left")?,
17626 vec![0, 0, 1, 1, 2, 4]
17627 );
17628 assert_eq!(
17629 source.searchsorted(&target, "right")?,
17630 vec![0, 1, 1, 2, 4, 4]
17631 );
17632
17633 Ok(())
17634 }
17635
17636 #[test]
17637 fn multi_index_searchsorted_empty_target_d89fe6() -> Result<(), super::IndexError> {
17638 let source = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?;
17639 let target = MultiIndex::from_tuples(Vec::new())?;
17640
17641 assert_eq!(source.searchsorted(&target, "left")?, Vec::<usize>::new());
17642
17643 Ok(())
17644 }
17645
17646 #[test]
17647 fn multi_index_searchsorted_rejects_invalid_side_d89fe6() -> Result<(), super::IndexError> {
17648 let source = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?;
17649 let target = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]])?;
17650
17651 let err = source.searchsorted(&target, "middle").unwrap_err();
17652
17653 assert!(matches!(
17654 err,
17655 super::IndexError::InvalidArgument(message)
17656 if message == "searchsorted: side must be 'left' or 'right', got \"middle\""
17657 ));
17658
17659 Ok(())
17660 }
17661
17662 #[test]
17663 fn multi_index_get_indexer_non_unique_level_mismatch_marks_all_missing() {
17664 let source = MultiIndex::from_tuples(vec![
17665 vec!["a".into(), 1_i64.into()],
17666 vec!["b".into(), 2_i64.into()],
17667 ])
17668 .unwrap();
17669 let target = MultiIndex::from_tuples(vec![vec!["a".into()], vec!["b".into()]]).unwrap();
17670
17671 let (indexer, missing) = source.get_indexer_non_unique(&target);
17672 assert_eq!(indexer, vec![-1, -1]);
17673 assert_eq!(missing, vec![0, 1]);
17674 }
17675
17676 #[test]
17677 fn multi_index_isin_tuple_membership() {
17678 let mi = MultiIndex::from_tuples(vec![
17679 vec!["a".into(), 1_i64.into()],
17680 vec!["b".into(), 2_i64.into()],
17681 vec!["a".into(), 3_i64.into()],
17682 ])
17683 .unwrap();
17684 let needles: Vec<Vec<IndexLabel>> = vec![
17685 vec!["a".into(), 1_i64.into()],
17686 vec!["b".into(), 2_i64.into()],
17687 ];
17688 assert_eq!(mi.isin(&needles), vec![true, true, false]);
17689 }
17690
17691 #[test]
17692 fn multi_index_isin_ignores_mismatched_tuple_length() {
17693 let mi = MultiIndex::from_tuples(vec![
17694 vec!["a".into(), 1_i64.into()],
17695 vec!["b".into(), 2_i64.into()],
17696 ])
17697 .unwrap();
17698 let needles: Vec<Vec<IndexLabel>> = vec![vec!["a".into()]];
17700 assert_eq!(mi.isin(&needles), vec![false, false]);
17701 }
17702
17703 #[test]
17704 fn multi_index_isin_empty_values_yields_all_false() {
17705 let mi = MultiIndex::from_tuples(vec![
17706 vec!["a".into(), 1_i64.into()],
17707 vec!["b".into(), 2_i64.into()],
17708 ])
17709 .unwrap();
17710 let needles: Vec<Vec<IndexLabel>> = Vec::new();
17711 assert_eq!(mi.isin(&needles), vec![false, false]);
17712 }
17713
17714 #[test]
17715 fn multi_index_isin_level_filters_by_level() {
17716 let mi = MultiIndex::from_tuples(vec![
17717 vec!["a".into(), 1_i64.into()],
17718 vec!["b".into(), 2_i64.into()],
17719 vec!["a".into(), 3_i64.into()],
17720 ])
17721 .unwrap();
17722 let level0 = mi.isin_level(&["a".into()], 0).unwrap();
17723 assert_eq!(level0, vec![true, false, true]);
17724 let level1 = mi.isin_level(&[2_i64.into(), 3_i64.into()], 1).unwrap();
17725 assert_eq!(level1, vec![false, true, true]);
17726 }
17727
17728 #[test]
17729 fn multi_index_isin_level_out_of_bounds_errors() {
17730 let mi = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]]).unwrap();
17731 let err = mi.isin_level(&["a".into()], 5).unwrap_err();
17732 assert!(matches!(err, crate::IndexError::OutOfBounds { .. }));
17733 }
17734
17735 #[test]
17736 fn multi_index_isin_empty_index_yields_empty() {
17737 let mi = MultiIndex::from_tuples(Vec::new()).unwrap();
17738 let needles: Vec<Vec<IndexLabel>> = vec![vec!["a".into(), 1_i64.into()]];
17739 assert_eq!(mi.isin(&needles), Vec::<bool>::new());
17740 }
17741
17742 #[test]
17743 fn multi_index_duplicated_keep_first_default() {
17744 let mi = MultiIndex::from_tuples(vec![
17745 vec!["a".into(), 1_i64.into()],
17746 vec!["b".into(), 2_i64.into()],
17747 vec!["a".into(), 1_i64.into()],
17748 vec!["c".into(), 3_i64.into()],
17749 ])
17750 .unwrap();
17751 let dup = mi.duplicated(DuplicateKeep::First);
17752 assert_eq!(dup, vec![false, false, true, false]);
17753 }
17754
17755 #[test]
17756 fn multi_index_duplicated_keep_last_marks_earlier_occurrences() {
17757 let mi = MultiIndex::from_tuples(vec![
17758 vec!["a".into(), 1_i64.into()],
17759 vec!["a".into(), 1_i64.into()],
17760 vec!["b".into(), 2_i64.into()],
17761 ])
17762 .unwrap();
17763 let dup = mi.duplicated(DuplicateKeep::Last);
17764 assert_eq!(dup, vec![true, false, false]);
17765 }
17766
17767 #[test]
17768 fn multi_index_duplicated_keep_none_marks_all_repeats() {
17769 let mi = MultiIndex::from_tuples(vec![
17770 vec!["a".into(), 1_i64.into()],
17771 vec!["b".into(), 2_i64.into()],
17772 vec!["a".into(), 1_i64.into()],
17773 vec!["c".into(), 3_i64.into()],
17774 ])
17775 .unwrap();
17776 let dup = mi.duplicated(DuplicateKeep::None);
17777 assert_eq!(dup, vec![true, false, true, false]);
17778 }
17779
17780 #[test]
17781 fn multi_index_is_unique_true_and_false() {
17782 let unique = MultiIndex::from_tuples(vec![
17783 vec!["a".into(), 1_i64.into()],
17784 vec!["b".into(), 2_i64.into()],
17785 ])
17786 .unwrap();
17787 assert!(unique.is_unique());
17788 assert!(!unique.has_duplicates());
17789
17790 let duped = MultiIndex::from_tuples(vec![
17791 vec!["a".into(), 1_i64.into()],
17792 vec!["a".into(), 1_i64.into()],
17793 ])
17794 .unwrap();
17795 assert!(!duped.is_unique());
17796 assert!(duped.has_duplicates());
17797 }
17798
17799 #[test]
17800 fn multi_index_duplicated_empty_yields_empty() {
17801 let mi = MultiIndex::from_tuples(Vec::new()).unwrap();
17802 assert_eq!(mi.duplicated(DuplicateKeep::First), Vec::<bool>::new());
17803 assert!(mi.is_unique());
17804 }
17805
17806 #[test]
17807 fn multi_index_all_any_reject_bool_reduction_d89fe7() -> Result<(), super::IndexError> {
17808 let non_empty = MultiIndex::from_tuples(vec![
17809 vec!["a".into(), 1_i64.into()],
17810 vec!["b".into(), 2_i64.into()],
17811 ])?;
17812 let empty = MultiIndex::from_arrays(vec![Vec::new(), Vec::new()])?;
17813
17814 let cases = [
17815 (
17816 non_empty.all().unwrap_err(),
17817 "cannot perform all with this index type: MultiIndex",
17818 ),
17819 (
17820 non_empty.any().unwrap_err(),
17821 "cannot perform any with this index type: MultiIndex",
17822 ),
17823 (
17824 empty.all().unwrap_err(),
17825 "cannot perform all with this index type: MultiIndex",
17826 ),
17827 (
17828 empty.any().unwrap_err(),
17829 "cannot perform any with this index type: MultiIndex",
17830 ),
17831 ];
17832
17833 for (err, expected) in cases {
17834 assert!(matches!(
17835 err,
17836 super::IndexError::InvalidArgument(message) if message == expected
17837 ));
17838 }
17839
17840 Ok(())
17841 }
17842
17843 #[test]
17844 fn multi_index_shift_rejects_temporal_shift_d89fe9() -> Result<(), super::IndexError> {
17845 let mi = MultiIndex::from_tuples(vec![
17846 vec!["a".into(), 1_i64.into()],
17847 vec!["b".into(), 2_i64.into()],
17848 ])?;
17849 let expected = "This method is only implemented for DatetimeIndex, PeriodIndex and TimedeltaIndex; Got type MultiIndex";
17850
17851 for err in [
17852 mi.shift(1, None).unwrap_err(),
17853 mi.shift(0, None).unwrap_err(),
17854 mi.shift(1, Some("D")).unwrap_err(),
17855 ] {
17856 assert!(matches!(
17857 err,
17858 super::IndexError::InvalidArgument(message) if message == expected
17859 ));
17860 }
17861
17862 Ok(())
17863 }
17864
17865 #[test]
17866 fn multi_index_str_rejects_string_accessor_d89fe12() -> Result<(), super::IndexError> {
17867 let mi = MultiIndex::from_tuples(vec![
17868 vec!["a".into(), 1_i64.into()],
17869 vec!["b".into(), 2_i64.into()],
17870 ])?;
17871
17872 let err = mi.r#str().unwrap_err();
17873
17874 assert!(matches!(
17875 err,
17876 super::IndexError::InvalidArgument(message)
17877 if message == "Can only use .str accessor with Index, not MultiIndex"
17878 ));
17879
17880 Ok(())
17881 }
17882
17883 #[test]
17884 fn multi_index_astype_object_clones_other_dtypes_reject_c2x17() -> Result<(), super::IndexError>
17885 {
17886 let mi = MultiIndex::from_tuples(vec![
17887 vec!["a".into(), 1_i64.into()],
17888 vec!["b".into(), 2_i64.into()],
17889 ])?;
17890
17891 for dtype in ["object", "O"] {
17892 let cloned = mi.astype(dtype)?;
17893 assert!(cloned.equals(&mi));
17894 assert_eq!(cloned.nlevels(), mi.nlevels());
17895 assert_eq!(cloned.len(), mi.len());
17896 }
17897
17898 let cat_err = mi.astype("category").unwrap_err();
17899 assert!(matches!(
17900 cat_err,
17901 super::IndexError::InvalidArgument(message)
17902 if message == "> 1 ndim Categorical are not supported at this time"
17903 ));
17904
17905 for dtype in ["int64", "float64", "datetime64[ns]"] {
17906 let err = mi.astype(dtype).unwrap_err();
17907 let expected = format!(
17908 "Setting a MultiIndex dtype to anything other than object is not supported; got {dtype}"
17909 );
17910 assert!(matches!(
17911 err,
17912 super::IndexError::InvalidArgument(message) if message == expected
17913 ));
17914 }
17915
17916 Ok(())
17917 }
17918
17919 #[test]
17920 fn multi_index_diff_rejects_tuple_subtraction_c2x17() -> Result<(), super::IndexError> {
17921 let mi = MultiIndex::from_tuples(vec![
17922 vec!["a".into(), 1_i64.into()],
17923 vec!["b".into(), 2_i64.into()],
17924 vec!["c".into(), 3_i64.into()],
17925 ])?;
17926 let expected = "cannot perform __sub__ with this index type: MultiIndex";
17927
17928 for periods in [-1_i64, 0, 1, 2] {
17929 let err = mi.diff(periods).unwrap_err();
17930 assert!(matches!(
17931 err,
17932 super::IndexError::InvalidArgument(message) if message == expected
17933 ));
17934 }
17935
17936 Ok(())
17937 }
17938
17939 #[test]
17940 fn multi_index_round_rejects_tuple_rint_c2x17() -> Result<(), super::IndexError> {
17941 let mi = MultiIndex::from_tuples(vec![
17942 vec!["a".into(), 1_i64.into()],
17943 vec!["b".into(), 2_i64.into()],
17944 ])?;
17945 let expected = "loop of ufunc does not support argument 0 of type tuple which has no callable rint method";
17946
17947 for decimals in [-1_i32, 0, 1, 4] {
17948 let err = mi.round(decimals).unwrap_err();
17949 assert!(matches!(
17950 err,
17951 super::IndexError::InvalidArgument(message) if message == expected
17952 ));
17953 }
17954
17955 Ok(())
17956 }
17957
17958 #[test]
17959 fn range_index_argmax_argmin_handles_step_direction_mrchb() {
17960 let asc = super::RangeIndex::new(0, 5, 1).unwrap();
17961 assert_eq!(asc.argmax().unwrap(), 4);
17962 assert_eq!(asc.argmin().unwrap(), 0);
17963
17964 let desc = super::RangeIndex::new(10, 0, -2).unwrap();
17965 assert_eq!(desc.argmax().unwrap(), 0);
17966 assert_eq!(desc.argmin().unwrap(), desc.len() - 1);
17967
17968 let big_step = super::RangeIndex::new(1, 100, 7).unwrap();
17969 assert_eq!(big_step.argmax().unwrap(), big_step.len() - 1);
17970 assert_eq!(big_step.argmin().unwrap(), 0);
17971 }
17972
17973 #[test]
17974 fn range_index_argmax_argmin_reject_empty_mrchb() {
17975 let empty = super::RangeIndex::new(5, 5, 1).unwrap();
17976 assert!(empty.is_empty());
17977 let max_err = empty.argmax().unwrap_err();
17978 assert!(matches!(
17979 max_err,
17980 super::IndexError::InvalidArgument(ref message)
17981 if message == "attempt to get argmax of an empty sequence"
17982 ));
17983 let min_err = empty.argmin().unwrap_err();
17984 assert!(matches!(
17985 min_err,
17986 super::IndexError::InvalidArgument(ref message)
17987 if message == "attempt to get argmin of an empty sequence"
17988 ));
17989 }
17990
17991 #[test]
17992 fn range_index_argsort_orientation_matches_step_sign_mrchb() {
17993 let asc = super::RangeIndex::new(0, 5, 1).unwrap();
17994 assert_eq!(asc.argsort(), vec![0, 1, 2, 3, 4]);
17995
17996 let desc = super::RangeIndex::new(10, 0, -2).unwrap();
17997 assert_eq!(desc.argsort(), vec![4, 3, 2, 1, 0]);
17998
17999 let empty = super::RangeIndex::new(0, 0, 1).unwrap();
18000 assert_eq!(empty.argsort(), Vec::<usize>::new());
18001 }
18002
18003 #[test]
18004 fn datetime_index_time_of_day_accessors_match_pandas_znejf() {
18005 let total: i64 = 1_704_112_496 * 1_000_000_000 + 789_012_345;
18010 let dt = super::DatetimeIndex::new(vec![total, i64::MIN, 0]);
18011
18012 assert_eq!(dt.hour(), vec![Some(12), None, Some(0)]);
18013 assert_eq!(dt.minute(), vec![Some(34), None, Some(0)]);
18014 assert_eq!(dt.second(), vec![Some(56), None, Some(0)]);
18015 assert_eq!(dt.microsecond(), vec![Some(789_012), None, Some(0)]);
18016 assert_eq!(dt.nanosecond(), vec![Some(345), None, Some(0)]);
18017 }
18018
18019 #[test]
18020 fn datetime_index_time_of_day_indexers_match_pandas_bwzmn() -> Result<(), super::IndexError> {
18021 let hour = fp_types::Timedelta::NANOS_PER_HOUR;
18022 let minute = fp_types::Timedelta::NANOS_PER_MIN;
18023 let day = fp_types::Timedelta::NANOS_PER_DAY;
18024 let dt = super::DatetimeIndex::new(vec![
18025 9 * hour,
18026 12 * hour + 30 * minute,
18027 i64::MIN,
18028 23 * hour + 30 * minute,
18029 day + 30 * minute,
18030 ]);
18031
18032 assert_eq!(dt.indexer_at_time("12:30")?, vec![1]);
18033 assert_eq!(dt.indexer_at_time("12:30:00.000000000")?, vec![1]);
18034 assert_eq!(dt.indexer_at_time("00:30:00")?, vec![4]);
18035 assert!(dt.indexer_at_time("not-a-time").is_err());
18036
18037 assert_eq!(
18038 dt.indexer_between_time("08:00", "13:00", true, true)?,
18039 vec![0, 1]
18040 );
18041 assert_eq!(
18042 dt.indexer_between_time("09:00", "13:00", false, true)?,
18043 vec![1]
18044 );
18045 assert_eq!(
18046 dt.indexer_between_time("23:00", "01:00", true, true)?,
18047 vec![3, 4]
18048 );
18049 assert_eq!(
18050 dt.indexer_between_time("23:30", "00:30", false, false)?,
18051 Vec::<usize>::new()
18052 );
18053 assert!(
18054 dt.indexer_between_time("09:00", "not-a-time", true, true)
18055 .is_err()
18056 );
18057 Ok(())
18058 }
18059
18060 #[test]
18061 fn datetime_index_week_weekofyear_match_pandas_e8xhb() {
18062 const NS: i64 = 1_000_000_000;
18063 let jan_01 = 1_704_067_200_i64 * NS;
18065 let dec_30 = 1_735_516_800_i64 * NS;
18068 let dt = super::DatetimeIndex::new(vec![jan_01, dec_30, i64::MIN]);
18069
18070 let weeks = dt.week();
18071 assert_eq!(weeks[0], Some(1));
18072 assert_eq!(weeks[1], Some(1));
18073 assert_eq!(weeks[2], None);
18074
18075 assert_eq!(dt.weekofyear(), weeks);
18077 assert_eq!(
18078 dt.isocalendar(),
18079 vec![Some((2024, 1, 1)), Some((2025, 1, 1)), None]
18080 );
18081 }
18082
18083 #[test]
18084 fn datetime_index_day_of_x_and_quarter_match_pandas_k860x() {
18085 let mon: i64 = 1_705_276_800 * 1_000_000_000;
18087 let sun: i64 = 1_705_795_200 * 1_000_000_000;
18089 let apr30: i64 = 1_714_435_200 * 1_000_000_000;
18091 let dt = super::DatetimeIndex::new(vec![mon, sun, apr30, i64::MIN]);
18092
18093 assert_eq!(dt.dayofyear(), vec![Some(15), Some(21), Some(121), None]);
18096 assert_eq!(dt.day_of_year(), dt.dayofyear());
18097
18098 assert_eq!(dt.dayofweek(), vec![Some(0), Some(6), Some(1), None]);
18100 assert_eq!(dt.day_of_week(), dt.dayofweek());
18101 assert_eq!(dt.weekday(), dt.dayofweek());
18102
18103 assert_eq!(dt.quarter(), vec![Some(1), Some(1), Some(2), None]);
18105
18106 assert_eq!(
18108 dt.is_leap_year(),
18109 vec![Some(true), Some(true), Some(true), None]
18110 );
18111
18112 assert_eq!(dt.days_in_month(), vec![Some(31), Some(31), Some(30), None]);
18114 assert_eq!(dt.daysinmonth(), dt.days_in_month());
18115 }
18116
18117 #[test]
18118 fn datetime_index_boundary_accessors_match_pandas_qy7yd() {
18119 const NS: i64 = 1_000_000_000;
18122 let jan_01 = 1_704_067_200_i64 * NS; let jan_31 = 1_706_659_200_i64 * NS; let feb_29 = 1_709_164_800_i64 * NS; let mar_31 = 1_711_843_200_i64 * NS; let apr_01 = 1_711_929_600_i64 * NS; let dec_31 = 1_735_603_200_i64 * NS; let nat = i64::MIN;
18129
18130 let dt =
18131 super::DatetimeIndex::new(vec![jan_01, jan_31, feb_29, mar_31, apr_01, dec_31, nat]);
18132
18133 assert_eq!(
18135 dt.is_year_start(),
18136 vec![
18137 Some(true),
18138 Some(false),
18139 Some(false),
18140 Some(false),
18141 Some(false),
18142 Some(false),
18143 None
18144 ]
18145 );
18146 assert_eq!(
18148 dt.is_year_end(),
18149 vec![
18150 Some(false),
18151 Some(false),
18152 Some(false),
18153 Some(false),
18154 Some(false),
18155 Some(true),
18156 None
18157 ]
18158 );
18159 assert_eq!(
18161 dt.is_quarter_start(),
18162 vec![
18163 Some(true),
18164 Some(false),
18165 Some(false),
18166 Some(false),
18167 Some(true),
18168 Some(false),
18169 None
18170 ]
18171 );
18172 assert_eq!(
18174 dt.is_quarter_end(),
18175 vec![
18176 Some(false),
18177 Some(false),
18178 Some(false),
18179 Some(true),
18180 Some(false),
18181 Some(true),
18182 None
18183 ]
18184 );
18185 assert_eq!(
18187 dt.is_month_start(),
18188 vec![
18189 Some(true),
18190 Some(false),
18191 Some(false),
18192 Some(false),
18193 Some(true),
18194 Some(false),
18195 None
18196 ]
18197 );
18198 assert_eq!(
18200 dt.is_month_end(),
18201 vec![
18202 Some(false),
18203 Some(true),
18204 Some(true),
18205 Some(true),
18206 Some(false),
18207 Some(true),
18208 None
18209 ]
18210 );
18211 }
18212
18213 #[test]
18214 fn index_variants_insert_match_pandas_veabb() -> Result<(), super::IndexError> {
18215 const NS: i64 = 1_000_000_000;
18216 let a = 1_704_067_200_i64 * NS;
18217 let b = 1_705_276_800_i64 * NS;
18218 let c = 1_706_140_800_i64 * NS;
18219 let dt = super::DatetimeIndex::new(vec![a, c]).set_name("ts");
18220
18221 let middle = dt.insert(1, b)?;
18223 assert_eq!(middle.values(), vec![Some(a), Some(b), Some(c)]);
18224 assert_eq!(middle.name(), Some("ts"));
18225
18226 let end = dt.insert(dt.len(), b)?;
18228 assert_eq!(end.values(), vec![Some(a), Some(c), Some(b)]);
18229
18230 assert!(matches!(
18232 dt.insert(99, b).unwrap_err(),
18233 super::IndexError::OutOfBounds {
18234 position: 99,
18235 length: 2
18236 }
18237 ));
18238
18239 let td = super::TimedeltaIndex::new(vec![100_i64, 300]).set_name("d");
18240 let td_inserted = td.insert(1, 200)?;
18241 assert_eq!(td_inserted.values(), vec![Some(100), Some(200), Some(300)]);
18242 assert_eq!(td_inserted.name(), Some("d"));
18243
18244 use fp_types::{Period, PeriodFreq};
18245 let p1 = Period::new(10, PeriodFreq::Monthly);
18246 let p2 = Period::new(11, PeriodFreq::Monthly);
18247 let p3 = Period::new(12, PeriodFreq::Monthly);
18248 let pi = super::PeriodIndex::new(vec![p1, p3]).set_name("p");
18249 let pi_inserted = pi.insert(1, p2)?;
18250 assert_eq!(pi_inserted.values(), &[p1, p2, p3]);
18251
18252 let r = super::RangeIndex::new(0, 3, 1).unwrap();
18253 let r_inserted = r.insert(1, 99)?;
18254 let labels = int64_labels(&r_inserted);
18255 assert_eq!(labels, vec![0, 99, 1, 2]);
18256 Ok(())
18257 }
18258
18259 #[test]
18260 fn index_variants_format_match_pandas_n31q2() {
18261 const NS: i64 = 1_000_000_000;
18262 let dt = super::DatetimeIndex::new(vec![1_704_067_200_i64 * NS, i64::MIN]);
18263 let dt_fmt = dt.format();
18264 assert!(dt_fmt[0].starts_with("2024-01-01"));
18265 assert_eq!(dt_fmt[1], "NaT");
18266
18267 let td = super::TimedeltaIndex::new(vec![1_000_000_i64, fp_types::Timedelta::NAT]);
18268 let td_fmt = td.format();
18269 assert_eq!(td_fmt[0], "1000000");
18270 assert_eq!(td_fmt[1], "NaT");
18271
18272 use fp_types::{Period, PeriodFreq};
18273 let pi = super::PeriodIndex::new(vec![Period::new(10, PeriodFreq::Monthly)]);
18274 let pi_fmt = pi.format();
18275 assert!(!pi_fmt[0].is_empty());
18276
18277 let cat = super::CategoricalIndex::from_values(vec!["a".to_owned(), "b".to_owned()], false);
18278 assert_eq!(cat.format(), vec!["a".to_owned(), "b".to_owned()]);
18279 }
18280
18281 #[test]
18282 fn datetime_timedelta_fillna_isnull_match_pandas_az3t9() {
18283 const NS: i64 = 1_000_000_000;
18284 let unix = 1_704_067_200_i64 * NS;
18285 let dt = super::DatetimeIndex::new(vec![unix, i64::MIN, 0]).set_name("ts");
18286
18287 let filled = dt.fillna(unix);
18288 assert_eq!(filled.values(), vec![Some(unix), Some(unix), Some(0)]);
18290 assert_eq!(filled.name(), Some("ts"));
18291
18292 let iso = dt.isnull();
18293 assert_eq!(iso, dt.isna());
18294 let nio = dt.notnull();
18295 assert_eq!(nio, dt.notna());
18296
18297 let nat = fp_types::Timedelta::NAT;
18298 let td = super::TimedeltaIndex::new(vec![100_i64, nat, 0]).set_name("d");
18299 let td_filled = td.fillna(99);
18300 assert_eq!(td_filled.values(), vec![Some(100), Some(99), Some(0)]);
18301 assert_eq!(td_filled.name(), Some("d"));
18302 assert_eq!(td.isnull(), td.isna());
18303 assert_eq!(td.notnull(), td.notna());
18304 }
18305
18306 #[test]
18307 fn datetime_index_date_and_time_accessors_match_pandas_66pll() {
18308 const NS: i64 = 1_000_000_000;
18309 let total: i64 = 1_705_322_096_i64 * NS + 789_012_345;
18311 let dt = super::DatetimeIndex::new(vec![total, i64::MIN, 0]);
18312
18313 let dates = dt.date();
18314 assert_eq!(
18315 dates[0],
18316 Some(chrono::NaiveDate::from_ymd_opt(2024, 1, 15).unwrap())
18317 );
18318 assert_eq!(dates[1], None);
18319 assert_eq!(
18320 dates[2],
18321 Some(chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap())
18322 );
18323
18324 let times = dt.time();
18325 assert_eq!(
18326 times[0],
18327 chrono::NaiveTime::from_hms_nano_opt(12, 34, 56, 789_012_345)
18328 );
18329 assert_eq!(times[1], None);
18330 assert_eq!(times[2], chrono::NaiveTime::from_hms_nano_opt(0, 0, 0, 0));
18331 assert_eq!(dt.timetz(), times);
18332 }
18333
18334 #[test]
18335 fn datetime_index_to_pydatetime_and_julian_match_pandas_dww6m() {
18336 const NS: i64 = 1_000_000_000;
18337 let unix = 1_704_067_200_i64;
18339 let total = unix * NS;
18340 let dt = super::DatetimeIndex::new(vec![total, i64::MIN]);
18341
18342 let pydt = dt.to_pydatetime();
18343 let first = pydt[0].expect("non-NAT label decodes");
18344 assert_eq!(first.timestamp(), unix);
18345 assert_eq!(pydt[1], None);
18346
18347 let julian = dt.to_julian_date();
18348 let expected = (unix as f64) / 86_400.0 + 2_440_587.5;
18350 let observed = julian[0].expect("non-NAT label decodes");
18351 assert!((observed - expected).abs() < 1e-9);
18352 assert_eq!(julian[1], None);
18353 }
18354
18355 #[test]
18356 fn timedelta_index_to_pytimedelta_match_pandas_dww6m() {
18357 let one_day_nanos = fp_types::Timedelta::NANOS_PER_DAY;
18358 let td = super::TimedeltaIndex::new(vec![one_day_nanos, fp_types::Timedelta::NAT]);
18359 let durations = td.to_pytimedelta();
18360 let one_day = durations[0].expect("non-NAT label decodes");
18361 assert_eq!(one_day.num_seconds(), 86_400);
18362 assert_eq!(durations[1], None);
18363 }
18364
18365 #[test]
18366 fn datetime_index_tz_localize_tz_convert_match_pandas_qm31w() {
18367 const NS: i64 = 1_000_000_000;
18368 let dt = super::DatetimeIndex::new(vec![1_704_067_200_i64 * NS]).set_name("ts");
18369
18370 let utc = dt.tz_localize("UTC").expect("UTC localize");
18372 assert!(utc.equals(&dt));
18373 assert_eq!(utc.name(), Some("ts"));
18374
18375 let err = dt.tz_localize("US/Eastern").unwrap_err();
18377 assert!(matches!(
18378 err,
18379 super::IndexError::InvalidArgument(ref message)
18380 if message.contains("tz_localize") && message.contains("UTC")
18381 ));
18382
18383 let conv_err = dt.tz_convert("UTC").unwrap_err();
18385 assert!(matches!(
18386 conv_err,
18387 super::IndexError::InvalidArgument(ref message)
18388 if message.contains("tz_convert")
18389 ));
18390 }
18391
18392 #[test]
18393 fn datetime_timedelta_as_unit_match_pandas_70mbe() {
18394 let dt = super::DatetimeIndex::new(vec![]);
18395 assert!(dt.as_unit("ns").is_ok());
18396 let bad = dt.as_unit("us").unwrap_err();
18397 assert!(matches!(
18398 bad,
18399 super::IndexError::InvalidArgument(ref msg) if msg.contains("as_unit")
18400 ));
18401
18402 let td = super::TimedeltaIndex::new(vec![]);
18403 assert!(td.as_unit("ns").is_ok());
18404 assert!(td.as_unit("ms").is_err());
18405 }
18406
18407 #[test]
18408 fn datetime_timedelta_unit_resolution_match_pandas_c50rv() {
18409 let dt = super::DatetimeIndex::new(vec![]);
18410 assert_eq!(dt.unit(), "ns");
18411 assert_eq!(dt.resolution(), "nanosecond");
18412
18413 let td = super::TimedeltaIndex::new(vec![]);
18414 assert_eq!(td.unit(), "ns");
18415 assert_eq!(td.resolution(), "nanosecond");
18416 }
18417
18418 #[test]
18419 fn datetime_timedelta_tz_freq_accessors_return_none_ze7et() {
18420 const NS: i64 = 1_000_000_000;
18421 let dt = super::DatetimeIndex::new(vec![1_704_067_200_i64 * NS]);
18422 assert_eq!(dt.tz(), None);
18423 assert_eq!(dt.tzinfo(), None);
18424 assert_eq!(dt.freq(), None);
18425 assert_eq!(dt.freqstr(), None);
18426 assert_eq!(dt.inferred_freq(), None);
18427
18428 let td = super::TimedeltaIndex::new(vec![100_i64]);
18429 assert_eq!(td.freq(), None);
18430 assert_eq!(td.freqstr(), None);
18431 assert_eq!(td.inferred_freq(), None);
18432 }
18433
18434 #[test]
18435 fn period_index_freqstr_inferred_freq_match_pandas_ze7et() {
18436 use fp_types::{Period, PeriodFreq};
18437 let pi = super::PeriodIndex::new(vec![
18438 Period::new(10, PeriodFreq::Monthly),
18439 Period::new(11, PeriodFreq::Monthly),
18440 ]);
18441 let s = pi.freqstr().expect("homogeneous index has a freqstr");
18442 assert!(!s.is_empty());
18443 let inferred = pi.inferred_freq().expect("homogeneous freq is inferable");
18444 assert_eq!(inferred, s);
18445
18446 let mixed = super::PeriodIndex::new(vec![
18448 Period::new(10, PeriodFreq::Monthly),
18449 Period::new(10, PeriodFreq::Annual),
18450 ]);
18451 assert_eq!(mixed.inferred_freq(), None);
18452
18453 let empty = super::PeriodIndex::new(Vec::new());
18455 assert_eq!(empty.freqstr(), None);
18456 assert_eq!(empty.inferred_freq(), None);
18457 }
18458
18459 #[test]
18460 fn range_index_where_putmask_match_pandas_jw1kw() -> Result<(), super::IndexError> {
18461 let r = super::RangeIndex::new(0, 5, 1).unwrap().set_name("r");
18462
18463 let masked = r.r#where(&[true, false, true, false, true], 99)?;
18464 assert_eq!(int64_labels(&masked), vec![0, 99, 2, 99, 4]);
18465 assert_eq!(masked.name(), Some("r"));
18466
18467 let put = r.putmask(&[false, true, false, true, false], 99)?;
18468 assert_eq!(int64_labels(&put), vec![0, 99, 2, 99, 4]);
18469
18470 assert!(matches!(
18472 r.r#where(&[true, false], 0).unwrap_err(),
18473 super::IndexError::LengthMismatch { .. }
18474 ));
18475 assert!(matches!(
18476 r.putmask(&[true; 7], 0).unwrap_err(),
18477 super::IndexError::LengthMismatch { .. }
18478 ));
18479 Ok(())
18480 }
18481
18482 #[test]
18483 fn range_index_set_ops_match_pandas_tz40f() {
18484 let left = super::RangeIndex::new(0, 5, 1).unwrap().set_name("r");
18485 let right = super::RangeIndex::new(3, 8, 1).unwrap().set_name("r");
18486
18487 let inter = left.intersection(&right);
18488 assert_eq!(int64_labels(&inter), vec![3, 4]);
18489 assert_eq!(inter.name(), Some("r"));
18490
18491 let union = left.union(&right);
18492 assert_eq!(int64_labels(&union), vec![0, 1, 2, 3, 4, 5, 6, 7]);
18493
18494 let diff = left.difference(&right);
18495 assert_eq!(int64_labels(&diff), vec![0, 1, 2]);
18496
18497 let sym = left.symmetric_difference(&right);
18498 assert_eq!(int64_labels(&sym), vec![0, 1, 2, 5, 6, 7]);
18499
18500 let other_name = super::RangeIndex::new(3, 6, 1).unwrap().set_name("other");
18502 assert_eq!(left.union(&other_name).name(), None);
18503 }
18504
18505 #[test]
18506 fn period_range_slice_indexer_match_pandas_18kvv() -> Result<(), super::IndexError> {
18507 use fp_types::{Period, PeriodFreq};
18508 let pi = super::PeriodIndex::new(vec![
18509 Period::new(10, PeriodFreq::Monthly),
18510 Period::new(11, PeriodFreq::Monthly),
18511 Period::new(12, PeriodFreq::Monthly),
18512 ]);
18513 assert_eq!(
18514 pi.slice_indexer(
18515 Period::new(11, PeriodFreq::Monthly),
18516 Period::new(12, PeriodFreq::Monthly)
18517 )?,
18518 1..3
18519 );
18520
18521 let r = super::RangeIndex::new(0, 10, 2).unwrap();
18522 assert_eq!(r.slice_indexer(2, 6)?, 1..4);
18523 Ok(())
18524 }
18525
18526 #[test]
18527 fn period_range_slice_locs_match_pandas_fdga0() -> Result<(), super::IndexError> {
18528 use fp_types::{Period, PeriodFreq};
18529 let p1 = Period::new(10, PeriodFreq::Monthly);
18530 let p2 = Period::new(11, PeriodFreq::Monthly);
18531 let p3 = Period::new(12, PeriodFreq::Monthly);
18532 let p4 = Period::new(13, PeriodFreq::Monthly);
18533 let pi = super::PeriodIndex::new(vec![p1, p2, p3, p4]);
18534 assert_eq!(pi.slice_locs(p2, p3)?, (1, 3));
18535 assert_eq!(pi.slice_locs(p1, p4)?, (0, 4));
18536 let unsorted = super::PeriodIndex::new(vec![p3, p1, p2]);
18538 assert!(unsorted.slice_locs(p1, p3).is_err());
18539
18540 let r = super::RangeIndex::new(0, 10, 2).unwrap();
18541 assert_eq!(r.slice_locs(2, 6)?, (1, 4));
18543 assert_eq!(r.slice_locs(0, 8)?, (0, 5));
18544
18545 let desc = super::RangeIndex::new(10, 0, -2).unwrap();
18547 assert!(desc.slice_locs(2, 6).is_err());
18548 Ok(())
18549 }
18550
18551 #[test]
18552 fn typed_index_variants_rename_alias_match_pandas_i8t6n() {
18553 let dt = super::DatetimeIndex::new(vec![]);
18554 assert_eq!(dt.rename("ts").name(), Some("ts"));
18555
18556 let td = super::TimedeltaIndex::new(vec![]);
18557 assert_eq!(td.rename("d").name(), Some("d"));
18558
18559 use fp_types::PeriodFreq;
18560 let pi = super::PeriodIndex::new(vec![]);
18561 assert_eq!(pi.rename("p").name(), Some("p"));
18562 let _ = PeriodFreq::Monthly; let r = super::RangeIndex::new(0, 0, 1).unwrap();
18565 assert_eq!(r.rename("r").name(), Some("r"));
18566
18567 let cat = super::CategoricalIndex::from_values(vec!["a".to_owned()], false);
18568 assert_eq!(cat.rename("c").name(), Some("c"));
18569 }
18570
18571 #[test]
18572 fn typed_index_variants_reindex_match_pandas_qm3nq() {
18573 const NS: i64 = 1_000_000_000;
18574 let a = 1_704_067_200_i64 * NS;
18575 let b = 1_705_276_800_i64 * NS;
18576 let dt = super::DatetimeIndex::new(vec![a, b]);
18577 let target = super::DatetimeIndex::new(vec![b, a, 0]);
18578 let (out, indexer) = dt.reindex(&target);
18579 assert_eq!(out.values(), target.values());
18580 assert_eq!(indexer, vec![1, 0, -1]);
18581
18582 let td = super::TimedeltaIndex::new(vec![100_i64, 200]);
18583 let td_target = super::TimedeltaIndex::new(vec![200_i64, 999]);
18584 let (_, td_indexer) = td.reindex(&td_target);
18585 assert_eq!(td_indexer, vec![1, -1]);
18586
18587 use fp_types::{Period, PeriodFreq};
18588 let p1 = Period::new(10, PeriodFreq::Monthly);
18589 let p2 = Period::new(11, PeriodFreq::Monthly);
18590 let pi = super::PeriodIndex::new(vec![p1, p2]);
18591 let pi_target = super::PeriodIndex::new(vec![p2, Period::new(99, PeriodFreq::Monthly)]);
18592 let (_, pi_indexer) = pi.reindex(&pi_target);
18593 assert_eq!(pi_indexer, vec![1, -1]);
18594
18595 let r = super::RangeIndex::new(0, 5, 1).unwrap();
18596 let r_target = super::RangeIndex::new(2, 6, 1).unwrap();
18597 let (_, r_indexer) = r.reindex(&r_target);
18598 assert_eq!(r_indexer, vec![2, 3, 4, -1]);
18599 }
18600
18601 #[test]
18602 fn period_range_categorical_get_indexer_non_unique_match_pandas_z9sna()
18603 -> Result<(), super::IndexError> {
18604 use fp_types::{Period, PeriodFreq};
18605 let p1 = Period::new(10, PeriodFreq::Monthly);
18606 let p2 = Period::new(11, PeriodFreq::Monthly);
18607 let pi = super::PeriodIndex::new(vec![p1, p2, p1]);
18609 let (positions, missing) =
18610 pi.get_indexer_non_unique(&[p1, Period::new(99, PeriodFreq::Monthly)]);
18611 assert_eq!(positions, vec![0, 2, -1]);
18612 assert_eq!(missing, vec![1]);
18613
18614 let r = super::RangeIndex::new(0, 5, 1).unwrap();
18616 let (positions, missing) = r.get_indexer_non_unique(&[2, 99]);
18617 assert_eq!(positions, vec![2, -1]);
18618 assert_eq!(missing, vec![1]);
18619
18620 let cat = super::CategoricalIndex::from_values(
18622 vec!["a".to_owned(), "b".to_owned(), "a".to_owned()],
18623 false,
18624 );
18625 let (positions, missing) = cat.get_indexer_non_unique(&["a".to_owned(), "z".to_owned()]);
18626 assert_eq!(positions, vec![0, 2, -1]);
18627 assert_eq!(missing, vec![1]);
18628
18629 let mapped = cat.get_indexer(&["b".to_owned(), "z".to_owned()]);
18631 assert_eq!(mapped, vec![1, -1]);
18632 assert_eq!(
18634 cat.get_indexer_for(&["a".to_owned()]),
18635 cat.get_indexer(&["a".to_owned()])
18636 );
18637 Ok(())
18638 }
18639
18640 #[test]
18641 fn typed_index_variants_get_indexer_for_aliases_match_pandas_lf1jy()
18642 -> Result<(), super::IndexError> {
18643 const NS: i64 = 1_000_000_000;
18644 let dt = super::DatetimeIndex::new(vec![1_704_067_200_i64 * NS]);
18645 assert_eq!(
18646 dt.get_indexer_for(&[1_704_067_200_i64 * NS, 0]),
18647 dt.get_indexer(&[1_704_067_200_i64 * NS, 0])
18648 );
18649
18650 let td = super::TimedeltaIndex::new(vec![100_i64, 200]);
18651 assert_eq!(td.get_indexer_for(&[200, 999]), td.get_indexer(&[200, 999]));
18652
18653 use fp_types::{Period, PeriodFreq};
18654 let pi = super::PeriodIndex::new(vec![Period::new(10, PeriodFreq::Monthly)]);
18655 let target = vec![Period::new(10, PeriodFreq::Monthly)];
18656 assert_eq!(pi.get_indexer_for(&target), pi.get_indexer(&target));
18657
18658 let r = super::RangeIndex::new(0, 5, 1).unwrap();
18659 assert_eq!(r.get_indexer_for(&[2, 99]), r.get_indexer(&[2, 99]));
18660 Ok(())
18661 }
18662
18663 #[test]
18664 fn period_range_get_loc_get_indexer_match_pandas_e7psu() -> Result<(), super::IndexError> {
18665 use fp_types::{Period, PeriodFreq};
18666 let p1 = Period::new(10, PeriodFreq::Monthly);
18667 let p2 = Period::new(11, PeriodFreq::Monthly);
18668 let p3 = Period::new(12, PeriodFreq::Monthly);
18669 let pi = super::PeriodIndex::new(vec![p1, p2, p3]);
18670 assert_eq!(pi.get_loc(p2)?, 1);
18671 assert!(pi.get_loc(Period::new(99, PeriodFreq::Monthly)).is_err());
18672 assert_eq!(
18673 pi.get_indexer(&[p3, p1, Period::new(99, PeriodFreq::Monthly)]),
18674 vec![2, 0, -1]
18675 );
18676
18677 let r = super::RangeIndex::new(0, 10, 2).unwrap();
18679 assert_eq!(r.get_loc(0)?, 0);
18680 assert_eq!(r.get_loc(8)?, 4);
18681 assert!(r.get_loc(7).is_err()); assert!(r.get_loc(99).is_err()); assert_eq!(r.get_indexer(&[4, 7, 0, 99]), vec![2, -1, 0, -1]);
18684
18685 let desc = super::RangeIndex::new(10, 0, -2).unwrap();
18687 assert_eq!(desc.get_loc(10)?, 0);
18688 assert_eq!(desc.get_loc(2)?, 4);
18689 assert!(desc.get_loc(7).is_err());
18690 Ok(())
18691 }
18692
18693 #[test]
18694 fn period_index_where_putmask_match_pandas_so9oh() -> Result<(), super::IndexError> {
18695 use fp_types::{Period, PeriodFreq};
18696 let p1 = Period::new(10, PeriodFreq::Monthly);
18697 let p2 = Period::new(11, PeriodFreq::Monthly);
18698 let p3 = Period::new(12, PeriodFreq::Monthly);
18699 let pi = super::PeriodIndex::new(vec![p1, p2, p3]).set_name("p");
18700
18701 let masked = pi.r#where(&[true, false, true], p1)?;
18703 assert_eq!(masked.values(), &[p1, p1, p3]);
18704 assert_eq!(masked.name(), Some("p"));
18705
18706 let put = pi.putmask(&[false, true, false], p1)?;
18708 assert_eq!(put.values(), &[p1, p1, p3]);
18709
18710 let bad_len = pi.r#where(&[true, false], p1).unwrap_err();
18712 assert!(matches!(bad_len, super::IndexError::LengthMismatch { .. }));
18713
18714 let mismatch = Period::new(10, PeriodFreq::Annual);
18716 assert!(pi.r#where(&[true, false, true], mismatch).is_err());
18717 assert!(pi.putmask(&[false, true, false], mismatch).is_err());
18718 Ok(())
18719 }
18720
18721 #[test]
18722 fn categorical_index_where_putmask_match_pandas_so9oh() -> Result<(), super::IndexError> {
18723 let cat = super::CategoricalIndex::with_categories(
18724 vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
18725 vec![
18726 "a".to_owned(),
18727 "b".to_owned(),
18728 "c".to_owned(),
18729 "d".to_owned(),
18730 ],
18731 false,
18732 )?;
18733
18734 let masked = cat.r#where(&[true, false, true], "d")?;
18735 assert_eq!(
18736 masked.labels(),
18737 vec!["a".to_owned(), "d".to_owned(), "c".to_owned()].as_slice()
18738 );
18739
18740 let put = cat.putmask(&[false, true, true], "d")?;
18741 assert_eq!(
18742 put.labels(),
18743 vec!["a".to_owned(), "d".to_owned(), "d".to_owned()].as_slice()
18744 );
18745
18746 assert!(cat.r#where(&[true, false, true], "zzz").is_err());
18748
18749 assert!(cat.putmask(&[true; 5], "a").is_err());
18751 Ok(())
18752 }
18753
18754 #[test]
18755 fn period_index_set_ops_match_pandas_8042v() -> Result<(), super::IndexError> {
18756 use fp_types::{Period, PeriodFreq};
18757 let p1 = Period::new(10, PeriodFreq::Monthly);
18758 let p2 = Period::new(11, PeriodFreq::Monthly);
18759 let p3 = Period::new(12, PeriodFreq::Monthly);
18760 let p4 = Period::new(13, PeriodFreq::Monthly);
18761 let left = super::PeriodIndex::new(vec![p1, p2, p3]).set_name("p");
18762 let right = super::PeriodIndex::new(vec![p2, p3, p4]).set_name("p");
18763
18764 assert_eq!(left.intersection(&right)?.values(), &[p2, p3]);
18765 assert_eq!(left.union(&right)?.values(), &[p1, p2, p3, p4]);
18766 assert_eq!(left.difference(&right)?.values(), &[p1]);
18767 assert_eq!(left.symmetric_difference(&right)?.values(), &[p1, p4]);
18768
18769 let mismatch = super::PeriodIndex::new(vec![Period::new(10, PeriodFreq::Annual)]);
18771 assert!(left.intersection(&mismatch).is_err());
18772 assert!(left.union(&mismatch).is_err());
18773 assert!(left.difference(&mismatch).is_err());
18774 assert!(left.symmetric_difference(&mismatch).is_err());
18775
18776 let other_name = super::PeriodIndex::new(vec![p2]).set_name("other");
18778 assert_eq!(left.union(&other_name)?.name(), None);
18779 Ok(())
18780 }
18781
18782 #[test]
18783 fn period_categorical_sort_values_match_pandas_482qd() -> Result<(), super::IndexError> {
18784 use fp_types::{Period, PeriodFreq};
18785 let p1 = Period::new(10, PeriodFreq::Monthly);
18786 let p2 = Period::new(11, PeriodFreq::Monthly);
18787 let p3 = Period::new(12, PeriodFreq::Monthly);
18788 let pi = super::PeriodIndex::new(vec![p3, p1, p2]).set_name("p");
18789 let sorted = pi.sort_values()?;
18790 let sorted_alias = pi.sort()?;
18791 assert_eq!(sorted.values(), &[p1, p2, p3]);
18792 assert_eq!(sorted_alias.values(), sorted.values());
18793 assert_eq!(sorted.name(), Some("p"));
18794 assert_eq!(sorted_alias.name(), Some("p"));
18795
18796 let mixed = super::PeriodIndex::new(vec![
18797 Period::new(10, PeriodFreq::Monthly),
18798 Period::new(10, PeriodFreq::Annual),
18799 ]);
18800 assert!(mixed.sort_values().is_err());
18801 assert!(mixed.sort().is_err());
18802
18803 let cat = super::CategoricalIndex::with_categories(
18805 vec![
18806 "b".to_owned(),
18807 "a".to_owned(),
18808 "c".to_owned(),
18809 "a".to_owned(),
18810 ],
18811 vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
18812 true,
18813 )?;
18814 let cat_sorted = cat.sort_values();
18815 let cat_sorted_alias = cat.sort();
18816 assert_eq!(
18817 cat_sorted.labels(),
18818 vec![
18819 "a".to_owned(),
18820 "a".to_owned(),
18821 "b".to_owned(),
18822 "c".to_owned()
18823 ]
18824 .as_slice()
18825 );
18826 assert_eq!(cat_sorted_alias.labels(), cat_sorted.labels());
18827
18828 Ok(())
18829 }
18830
18831 #[test]
18832 fn categorical_sort_values_by_category_code_not_lexicographic() {
18833 let cat = super::CategoricalIndex::with_categories(
18839 vec![
18840 "a".to_owned(),
18841 "c".to_owned(),
18842 "b".to_owned(),
18843 "a".to_owned(),
18844 ],
18845 vec!["b".to_owned(), "a".to_owned(), "c".to_owned()],
18846 true,
18847 )
18848 .unwrap();
18849 assert_eq!(cat.argsort(), vec![2, 0, 3, 1]);
18852 assert_eq!(
18853 cat.sort_values().labels(),
18854 [
18855 "b".to_owned(),
18856 "a".to_owned(),
18857 "a".to_owned(),
18858 "c".to_owned()
18859 ]
18860 .as_slice()
18861 );
18862
18863 let cat_u = super::CategoricalIndex::with_categories(
18865 vec!["a".to_owned(), "b".to_owned()],
18866 vec!["b".to_owned(), "a".to_owned()],
18867 false,
18868 )
18869 .unwrap();
18870 assert_eq!(
18872 cat_u.sort_values().labels(),
18873 ["b".to_owned(), "a".to_owned()].as_slice()
18874 );
18875 }
18876
18877 #[test]
18878 fn period_index_from_ordinals_match_pandas_baenb() {
18879 use fp_types::PeriodFreq;
18880 let pi = super::PeriodIndex::from_ordinals(&[10, 11, 12], PeriodFreq::Monthly);
18881 assert_eq!(pi.values().len(), 3);
18882 assert_eq!(pi.values()[0].ordinal, 10);
18883 assert_eq!(pi.values()[2].ordinal, 12);
18884 assert_eq!(pi.asi8(), vec![10, 11, 12]);
18885 for period in pi.values() {
18886 assert_eq!(period.freq, PeriodFreq::Monthly);
18887 }
18888
18889 let empty = super::PeriodIndex::from_ordinals(&[], PeriodFreq::Annual);
18890 assert!(empty.is_empty());
18891 assert!(empty.asi8().is_empty());
18892 }
18893
18894 #[test]
18895 fn period_index_astype_datetime_and_int_match_pandas() -> Result<(), super::IndexError> {
18896 use fp_types::PeriodFreq;
18897
18898 let pi = super::PeriodIndex::from_ordinals(&[600, 601], PeriodFreq::Monthly).set_name("p");
18899
18900 let as_int = pi.astype("int64")?;
18901 assert_eq!(
18902 as_int.labels(),
18903 &[IndexLabel::Int64(600), IndexLabel::Int64(601)]
18904 );
18905 assert_eq!(as_int.name(), Some("p"));
18906
18907 let as_datetime = pi.astype("datetime64[ns]")?;
18908 assert_eq!(
18909 as_datetime.labels(),
18910 &[
18911 IndexLabel::Datetime64(1_577_836_800_000_000_000),
18912 IndexLabel::Datetime64(1_580_515_200_000_000_000),
18913 ]
18914 );
18915 assert_eq!(as_datetime.name(), Some("p"));
18916
18917 Ok(())
18918 }
18919
18920 #[test]
18921 fn period_index_missing_value_accessors_are_all_present() {
18922 use fp_types::PeriodFreq;
18923 let pi = super::PeriodIndex::from_ordinals(&[10, 11, 12], PeriodFreq::Monthly)
18924 .set_name("periods");
18925 assert!(!pi.hasnans());
18926 assert_eq!(pi.isna(), vec![false, false, false]);
18927 assert_eq!(pi.isnull(), pi.isna());
18928 assert_eq!(pi.notna(), vec![true, true, true]);
18929 assert_eq!(pi.notnull(), pi.notna());
18930 let dropped = pi.dropna();
18931 assert_eq!(dropped.values(), pi.values());
18932 assert_eq!(dropped.name(), Some("periods"));
18933 }
18934
18935 #[test]
18936 fn period_index_mean_median_match_pandas_3rsrc() -> Result<(), super::IndexError> {
18937 use fp_types::{Period, PeriodFreq};
18938 let p1 = Period::new(10, PeriodFreq::Monthly);
18939 let p2 = Period::new(20, PeriodFreq::Monthly);
18940 let p3 = Period::new(30, PeriodFreq::Monthly);
18941 let pi = super::PeriodIndex::new(vec![p1, p2, p3]);
18942 assert_eq!(pi.mean()?.unwrap().ordinal, 20);
18943 assert_eq!(pi.median()?.unwrap().ordinal, 20);
18944
18945 let empty = super::PeriodIndex::new(Vec::new());
18946 assert_eq!(empty.mean()?, None);
18947 assert_eq!(empty.median()?, None);
18948
18949 let mixed = super::PeriodIndex::new(vec![p1, Period::new(10, PeriodFreq::Annual)]);
18950 assert!(mixed.mean().is_err());
18951 assert!(mixed.median().is_err());
18952 Ok(())
18953 }
18954
18955 #[test]
18956 fn period_index_argmax_argmin_argsort_match_pandas_qg8u5() -> Result<(), super::IndexError> {
18957 use fp_types::{Period, PeriodFreq};
18958 let p1 = Period::new(10, PeriodFreq::Monthly);
18959 let p2 = Period::new(11, PeriodFreq::Monthly);
18960 let p3 = Period::new(12, PeriodFreq::Monthly);
18961 let pi = super::PeriodIndex::new(vec![p2, p3, p1]);
18962
18963 assert_eq!(pi.argmax()?, 1);
18964 assert_eq!(pi.argmin()?, 2);
18965 assert_eq!(pi.argsort()?, vec![2, 0, 1]);
18966
18967 let empty = super::PeriodIndex::new(Vec::new());
18968 assert!(empty.argmax().is_err());
18969 assert!(empty.argmin().is_err());
18970 assert!(empty.argsort()?.is_empty());
18971
18972 let mixed = super::PeriodIndex::new(vec![p1, Period::new(10, PeriodFreq::Annual)]);
18973 assert!(mixed.argmax().is_err());
18974 assert!(mixed.argsort().is_err());
18975 Ok(())
18976 }
18977
18978 #[test]
18979 fn period_index_shift_match_pandas_pnaui() -> Result<(), super::IndexError> {
18980 use fp_types::{Period, PeriodFreq};
18981 let p1 = Period::new(10, PeriodFreq::Monthly);
18982 let p2 = Period::new(11, PeriodFreq::Monthly);
18983 let pi = super::PeriodIndex::new(vec![p1, p2]).set_name("p");
18984
18985 let shifted = pi.shift(2)?;
18986 assert_eq!(shifted.values()[0].ordinal, 12);
18987 assert_eq!(shifted.values()[1].ordinal, 13);
18988 assert_eq!(shifted.name(), Some("p"));
18989
18990 let back = pi.shift(-1)?;
18992 assert_eq!(back.values()[0].ordinal, 9);
18993
18994 let mixed = super::PeriodIndex::new(vec![p1, Period::new(10, PeriodFreq::Annual)]);
18996 assert!(mixed.shift(1).is_err());
18997 Ok(())
18998 }
18999
19000 #[test]
19001 fn period_index_is_full_match_pandas_7i32m() {
19002 use fp_types::{Period, PeriodFreq};
19003 let p1 = Period::new(10, PeriodFreq::Monthly);
19004 let p2 = Period::new(11, PeriodFreq::Monthly);
19005 let p3 = Period::new(12, PeriodFreq::Monthly);
19006 let p5 = Period::new(14, PeriodFreq::Monthly);
19007
19008 let full = super::PeriodIndex::new(vec![p1, p2, p3]);
19010 assert!(full.is_full());
19011
19012 let unsorted = super::PeriodIndex::new(vec![p3, p1, p2]);
19014 assert!(unsorted.is_full());
19015
19016 let gap = super::PeriodIndex::new(vec![p1, p2, p5]);
19018 assert!(!gap.is_full());
19019
19020 assert!(super::PeriodIndex::new(Vec::new()).is_full());
19022 assert!(super::PeriodIndex::new(vec![p1]).is_full());
19023
19024 let mixed = super::PeriodIndex::new(vec![p1, Period::new(10, PeriodFreq::Annual)]);
19026 assert!(!mixed.is_full());
19027 }
19028
19029 #[test]
19030 fn period_index_min_max_match_pandas_fwlv4() -> Result<(), super::IndexError> {
19031 use fp_types::{Period, PeriodFreq};
19032 let p1 = Period::new(10, PeriodFreq::Monthly);
19033 let p2 = Period::new(11, PeriodFreq::Monthly);
19034 let p3 = Period::new(12, PeriodFreq::Monthly);
19035 let pi = super::PeriodIndex::new(vec![p3, p1, p2]);
19036 assert_eq!(pi.min()?, Some(p1));
19037 assert_eq!(pi.max()?, Some(p3));
19038
19039 let empty = super::PeriodIndex::new(Vec::new());
19040 assert_eq!(empty.min()?, None);
19041 assert_eq!(empty.max()?, None);
19042
19043 let mixed = super::PeriodIndex::new(vec![
19045 Period::new(10, PeriodFreq::Monthly),
19046 Period::new(10, PeriodFreq::Annual),
19047 ]);
19048 assert!(mixed.min().is_err());
19049 assert!(mixed.max().is_err());
19050 Ok(())
19051 }
19052
19053 #[test]
19054 fn range_index_sort_values_closed_form_mhcge() {
19055 let asc = super::RangeIndex::new(0, 5, 1).unwrap();
19056 assert!(asc.sort_values().equals(&asc));
19057 assert!(asc.sort().equals(&asc));
19058
19059 let desc = super::RangeIndex::new(10, 0, -2).unwrap();
19060 let sorted = desc.sort_values();
19062 let sorted_alias = desc.sort();
19063 assert_eq!(sorted.values(), vec![2, 4, 6, 8, 10]);
19064 assert_eq!(sorted_alias.values(), sorted.values());
19065
19066 let empty = super::RangeIndex::new(0, 0, 1).unwrap();
19067 assert!(empty.sort_values().is_empty());
19068 assert!(empty.sort().is_empty());
19069
19070 let zero_step = super::RangeIndex::new(0, 5, 1).unwrap();
19071 assert!(zero_step.sort_values().equals(&zero_step));
19072 assert!(zero_step.sort().equals(&zero_step));
19073 }
19074
19075 #[test]
19076 fn range_index_std_var_median_closed_form_tkc0m() {
19077 let r = super::RangeIndex::new(1, 11, 1).unwrap();
19078 assert_eq!(r.median(), Some(5.5));
19080 let var = r.var().unwrap();
19081 assert!((var - 9.1666666666).abs() < 1e-6);
19083 let std_val = r.std().unwrap();
19084 assert!((std_val - var.sqrt()).abs() < 1e-12);
19085
19086 let one = super::RangeIndex::new(5, 6, 1).unwrap();
19088 assert_eq!(one.median(), Some(5.0));
19089 assert_eq!(one.var(), None);
19090 assert_eq!(one.std(), None);
19091
19092 let empty = super::RangeIndex::new(0, 0, 1).unwrap();
19094 assert_eq!(empty.median(), None);
19095 assert_eq!(empty.var(), None);
19096 }
19097
19098 #[test]
19099 fn range_index_prod_match_pandas_8yxw8() {
19100 let r = super::RangeIndex::new(1, 6, 1).unwrap();
19102 assert_eq!(r.prod(), 120);
19103
19104 let empty = super::RangeIndex::new(0, 0, 1).unwrap();
19106 assert_eq!(empty.prod(), 1);
19107
19108 let with_zero = super::RangeIndex::new(0, 5, 1).unwrap();
19110 assert_eq!(with_zero.prod(), 0);
19111 }
19112
19113 #[test]
19114 fn range_index_min_max_sum_mean_closed_form_fwlv4() {
19115 let asc = super::RangeIndex::new(1, 11, 1).unwrap();
19116 assert_eq!(asc.min(), Some(1));
19118 assert_eq!(asc.max(), Some(10));
19119 assert_eq!(asc.sum(), 55);
19120 assert_eq!(asc.mean(), Some(5.5));
19121
19122 let desc = super::RangeIndex::new(10, 0, -2).unwrap();
19123 assert_eq!(desc.min(), Some(2));
19125 assert_eq!(desc.max(), Some(10));
19126 assert_eq!(desc.sum(), 30);
19127 assert_eq!(desc.mean(), Some(6.0));
19128
19129 let empty = super::RangeIndex::new(0, 0, 1).unwrap();
19130 assert_eq!(empty.min(), None);
19131 assert_eq!(empty.max(), None);
19132 assert_eq!(empty.sum(), 0);
19133 assert_eq!(empty.mean(), None);
19134 }
19135
19136 #[test]
19137 fn datetime_index_where_putmask_match_pandas_nwqty() -> Result<(), super::IndexError> {
19138 const NS: i64 = 1_000_000_000;
19139 let a = 1_704_067_200_i64 * NS;
19140 let b = 1_705_276_800_i64 * NS;
19141 let c = 1_706_140_800_i64 * NS;
19142 let dt = super::DatetimeIndex::new(vec![a, b, c]).set_name("ts");
19143
19144 let masked = dt.r#where(&[true, false, true], i64::MIN)?;
19146 assert_eq!(masked.values(), vec![Some(a), None, Some(c)]);
19147 assert_eq!(masked.name(), Some("ts"));
19148
19149 let put = dt.putmask(&[true, false, false], c)?;
19151 assert_eq!(put.values(), vec![Some(c), Some(b), Some(c)]);
19152
19153 let bad_cond = dt.r#where(&[true, false], i64::MIN).unwrap_err();
19155 assert!(matches!(
19156 bad_cond,
19157 super::IndexError::LengthMismatch {
19158 expected: 3,
19159 actual: 2,
19160 ..
19161 }
19162 ));
19163 let bad_mask = dt.putmask(&[true; 5], c).unwrap_err();
19164 assert!(matches!(
19165 bad_mask,
19166 super::IndexError::LengthMismatch {
19167 expected: 3,
19168 actual: 5,
19169 ..
19170 }
19171 ));
19172 Ok(())
19173 }
19174
19175 #[test]
19176 fn timedelta_index_where_putmask_match_pandas_nwqty() -> Result<(), super::IndexError> {
19177 let nat = fp_types::Timedelta::NAT;
19178 let td = super::TimedeltaIndex::new(vec![100_i64, 200, 300]).set_name("d");
19179
19180 let masked = td.r#where(&[false, true, false], nat)?;
19181 assert_eq!(masked.values(), vec![None, Some(200), None]);
19182 assert_eq!(masked.name(), Some("d"));
19183
19184 let put = td.putmask(&[false, true, true], 999)?;
19185 assert_eq!(put.values(), vec![Some(100), Some(999), Some(999)]);
19186
19187 let bad = td.r#where(&[true, false], nat).unwrap_err();
19188 assert!(matches!(
19189 bad,
19190 super::IndexError::LengthMismatch {
19191 expected: 3,
19192 actual: 2,
19193 ..
19194 }
19195 ));
19196 Ok(())
19197 }
19198
19199 #[test]
19200 fn index_variants_searchsorted_match_pandas_tam73() -> Result<(), super::IndexError> {
19201 const NS: i64 = 1_000_000_000;
19202 let a = 1_704_067_200_i64 * NS;
19203 let b = 1_705_276_800_i64 * NS;
19204 let c = 1_706_140_800_i64 * NS;
19205 let dt = super::DatetimeIndex::new(vec![a, b, c]);
19206
19207 assert_eq!(dt.searchsorted(a, "left")?, 0);
19208 assert_eq!(dt.searchsorted(a, "right")?, 1);
19209 assert_eq!(dt.searchsorted(c, "right")?, 3);
19210 let mid = a + 1;
19212 assert_eq!(dt.searchsorted(mid, "left")?, 1);
19213
19214 assert!(dt.searchsorted(a, "middle").is_err());
19216
19217 let td = super::TimedeltaIndex::new(vec![100_i64, 200, 300]);
19218 assert_eq!(td.searchsorted(150, "left")?, 1);
19219 assert_eq!(td.searchsorted(200, "right")?, 2);
19220
19221 use fp_types::{Period, PeriodFreq};
19222 let p1 = Period::new(10, PeriodFreq::Monthly);
19223 let p2 = Period::new(11, PeriodFreq::Monthly);
19224 let p3 = Period::new(12, PeriodFreq::Monthly);
19225 let pi = super::PeriodIndex::new(vec![p1, p2, p3]);
19226 assert_eq!(pi.searchsorted(p2, "left")?, 1);
19227 assert_eq!(pi.searchsorted(p3, "right")?, 3);
19228 let mismatch = Period::new(10, PeriodFreq::Annual);
19230 assert!(pi.searchsorted(mismatch, "left").is_err());
19231
19232 let r = super::RangeIndex::new(0, 10, 2).unwrap();
19233 assert_eq!(r.searchsorted(4, "left")?, 2);
19235 assert_eq!(r.searchsorted(4, "right")?, 3);
19236 assert_eq!(r.searchsorted(7, "left")?, 4);
19237
19238 let desc = super::RangeIndex::new(10, 0, -2).unwrap();
19240 assert!(desc.searchsorted(4, "left").is_err());
19241 Ok(())
19242 }
19243
19244 #[test]
19245 fn datetime_timedelta_get_indexer_non_unique_match_pandas_sm32a() {
19246 const NS: i64 = 1_000_000_000;
19247 let a = 1_704_067_200_i64 * NS;
19248 let b = 1_705_276_800_i64 * NS;
19249 let dt = super::DatetimeIndex::new(vec![a, b, a, b]);
19251 let (positions, missing) = dt.get_indexer_non_unique(&[a, b + 99]);
19252 assert_eq!(positions, vec![0, 2, -1]);
19254 assert_eq!(missing, vec![1]);
19255
19256 let td = super::TimedeltaIndex::new(vec![100_i64, 200, 100]);
19257 let (positions, missing) = td.get_indexer_non_unique(&[100, 999]);
19258 assert_eq!(positions, vec![0, 2, -1]);
19259 assert_eq!(missing, vec![1]);
19260 }
19261
19262 #[test]
19263 fn datetime_timedelta_get_loc_get_indexer_match_pandas_6x9de() -> Result<(), super::IndexError>
19264 {
19265 const NS: i64 = 1_000_000_000;
19266 let a = 1_704_067_200_i64 * NS;
19267 let b = 1_705_276_800_i64 * NS;
19268 let c = 1_706_140_800_i64 * NS;
19269 let dt = super::DatetimeIndex::new(vec![a, b, c]);
19270
19271 assert_eq!(dt.get_loc(b)?, 1);
19273 let missing_err = dt.get_loc(b + 1).unwrap_err();
19274 assert!(matches!(
19275 missing_err,
19276 super::IndexError::InvalidArgument(ref msg) if msg.contains("get_loc")
19277 ));
19278
19279 let mapped = dt.get_indexer(&[c, a, b + 999]);
19281 assert_eq!(mapped, vec![2, 0, -1]);
19282
19283 let td = super::TimedeltaIndex::new(vec![100_i64, 200, 300]);
19285 assert_eq!(td.get_loc(200)?, 1);
19286 assert_eq!(td.get_indexer(&[300, 999, 100]), vec![2, -1, 0]);
19287 Ok(())
19288 }
19289
19290 #[test]
19291 fn datetime_timedelta_slice_indexer_match_pandas_95eqf() -> Result<(), super::IndexError> {
19292 const NS: i64 = 1_000_000_000;
19293 let a = 1_704_067_200_i64 * NS;
19294 let b = 1_705_276_800_i64 * NS;
19295 let c = 1_706_140_800_i64 * NS;
19296 let dt = super::DatetimeIndex::new(vec![a, b, c]);
19297 assert_eq!(dt.slice_indexer(b, c)?, 1..3);
19298 assert_eq!(dt.slice_indexer(a, c)?, 0..3);
19299
19300 let td = super::TimedeltaIndex::new(vec![100_i64, 200, 300]);
19301 assert_eq!(td.slice_indexer(150, 250)?, 1..2);
19302 Ok(())
19303 }
19304
19305 #[test]
19306 fn datetime_timedelta_get_slice_bound_match_pandas_x7r04() -> Result<(), super::IndexError> {
19307 const NS: i64 = 1_000_000_000;
19308 let a = 1_704_067_200_i64 * NS;
19309 let b = 1_705_276_800_i64 * NS;
19310 let c = 1_706_140_800_i64 * NS;
19311 let dt = super::DatetimeIndex::new(vec![a, b, c]);
19312 assert_eq!(dt.get_slice_bound(b, "left")?, 1);
19313 assert_eq!(dt.get_slice_bound(b, "right")?, 2);
19314 assert!(dt.get_slice_bound(b, "middle").is_err());
19315
19316 let td = super::TimedeltaIndex::new(vec![100_i64, 200, 300]);
19317 assert_eq!(td.get_slice_bound(150, "left")?, 1);
19318 assert_eq!(td.get_slice_bound(200, "right")?, 2);
19319 Ok(())
19320 }
19321
19322 #[test]
19323 fn datetime_timedelta_slice_locs_match_pandas_mxedz() -> Result<(), super::IndexError> {
19324 const NS: i64 = 1_000_000_000;
19325 let a = 1_704_067_200_i64 * NS;
19326 let b = 1_705_276_800_i64 * NS;
19327 let c = 1_706_140_800_i64 * NS;
19328 let d = 1_707_350_400_i64 * NS;
19329 let dt = super::DatetimeIndex::new(vec![a, b, c, d]);
19330
19331 assert_eq!(dt.slice_locs(b, c)?, (1, 3));
19333 assert_eq!(dt.slice_locs(a, d)?, (0, 4));
19335 assert_eq!(dt.slice_locs(d + 1, d + 2)?, (4, 4));
19337
19338 let unsorted = super::DatetimeIndex::new(vec![c, a, b, d]);
19340 assert!(unsorted.slice_locs(a, c).is_err());
19341
19342 let td = super::TimedeltaIndex::new(vec![100_i64, 200, 300]);
19344 assert_eq!(td.slice_locs(150, 250)?, (1, 2));
19345
19346 Ok(())
19347 }
19348
19349 #[test]
19350 fn index_variants_to_flat_index_match_pandas_wcpw5() {
19351 const NS: i64 = 1_000_000_000;
19352 let dt = super::DatetimeIndex::new(vec![1_704_067_200_i64 * NS]).set_name("ts");
19353 let dt_flat = dt.to_flat_index();
19354 assert_eq!(dt_flat.len(), 1);
19355 assert_eq!(dt_flat.name(), Some("ts"));
19356 assert!(matches!(
19357 dt_flat.labels()[0],
19358 super::IndexLabel::Datetime64(_)
19359 ));
19360 assert_eq!(dt.to_frame(), dt_flat.to_frame());
19361 assert_eq!(dt.to_series(), dt_flat.to_series());
19362
19363 let td = super::TimedeltaIndex::new(vec![100_i64]).set_name("d");
19364 let td_flat = td.to_flat_index();
19365 assert_eq!(td_flat.len(), 1);
19366 assert_eq!(td_flat.name(), Some("d"));
19367 assert_eq!(td.to_frame(), td_flat.to_frame());
19368 assert_eq!(td.to_series(), td_flat.to_series());
19369
19370 use fp_types::{Period, PeriodFreq};
19371 let pi = super::PeriodIndex::new(vec![Period::new(10, PeriodFreq::Monthly)]).set_name("p");
19372 let pi_flat = pi.to_flat_index();
19373 assert_eq!(pi_flat.len(), 1);
19374 assert!(matches!(pi_flat.labels()[0], super::IndexLabel::Utf8(_)));
19375 assert_eq!(pi.to_frame(), pi_flat.to_frame());
19376 assert_eq!(pi.to_series(), pi_flat.to_series());
19377
19378 let r = super::RangeIndex::new(0, 3, 1).unwrap().set_name("r");
19379 let r_flat = r.to_flat_index();
19380 assert_eq!(r_flat.len(), 3);
19381 assert_eq!(r_flat.name(), Some("r"));
19382 assert_eq!(r.to_frame(), r_flat.to_frame());
19383 assert_eq!(r.to_series(), r_flat.to_series());
19384
19385 let cat = super::CategoricalIndex::from_values(vec!["a".to_owned(), "b".to_owned()], false);
19386 let cat_flat = cat.to_flat_index();
19387 assert_eq!(cat_flat.len(), 2);
19388 assert_eq!(cat.to_frame(), cat_flat.to_frame());
19389 assert_eq!(cat.to_series(), cat_flat.to_series());
19390 }
19391
19392 #[test]
19393 fn index_variants_all_any_forward_flat_truthiness_ejwyw() {
19394 const NS: i64 = 1_000_000_000;
19395
19396 let dt = super::DatetimeIndex::new(vec![0, NS]);
19397 let dt_flat = dt.to_flat_index();
19398 assert_eq!(dt.any(), dt_flat.any());
19399 assert_eq!(dt.all(), dt_flat.all());
19400 assert!(dt.any());
19401 assert!(!dt.all());
19402
19403 let td = super::TimedeltaIndex::new(vec![0, 5]);
19404 let td_flat = td.to_flat_index();
19405 assert_eq!(td.any(), td_flat.any());
19406 assert_eq!(td.all(), td_flat.all());
19407 assert!(td.any());
19408 assert!(!td.all());
19409
19410 use fp_types::{Period, PeriodFreq};
19411 let pi = super::PeriodIndex::new(vec![
19412 Period::new(1, PeriodFreq::Monthly),
19413 Period::new(2, PeriodFreq::Monthly),
19414 ]);
19415 let pi_flat = pi.to_flat_index();
19416 assert_eq!(pi.any(), pi_flat.any());
19417 assert_eq!(pi.all(), pi_flat.all());
19418 assert!(pi.any());
19419 assert!(pi.all());
19420
19421 let range = super::RangeIndex::new(0, 3, 1).unwrap();
19422 let range_flat = range.to_flat_index();
19423 assert_eq!(range.any(), range_flat.any());
19424 assert_eq!(range.all(), range_flat.all());
19425 assert!(range.any());
19426 assert!(!range.all());
19427
19428 let empty_range = super::RangeIndex::new(0, 0, 1).unwrap();
19429 assert!(!empty_range.any());
19430 assert!(empty_range.all());
19431
19432 let cat = super::CategoricalIndex::from_values(vec![String::new(), "x".to_owned()], false);
19433 let cat_flat = cat.to_flat_index();
19434 assert_eq!(cat.any(), cat_flat.any());
19435 assert_eq!(cat.all(), cat_flat.all());
19436 assert!(cat.any());
19437 assert!(!cat.all());
19438 }
19439
19440 #[test]
19441 fn index_variants_get_level_values_forward_flat_xf0zn() -> Result<(), super::IndexError> {
19442 const NS: i64 = 1_000_000_000;
19443
19444 let dt = super::DatetimeIndex::new(vec![NS, 2 * NS]).set_name("ts");
19445 assert_eq!(dt.get_level_values(0)?, dt.to_flat_index());
19446
19447 let td = super::TimedeltaIndex::new(vec![5, 10]).set_name("delta");
19448 assert_eq!(td.get_level_values(0)?, td.to_flat_index());
19449
19450 use fp_types::{Period, PeriodFreq};
19451 let pi =
19452 super::PeriodIndex::new(vec![Period::new(1, PeriodFreq::Monthly)]).set_name("period");
19453 assert_eq!(pi.get_level_values(0)?, pi.to_flat_index());
19454
19455 let range = super::RangeIndex::new(1, 4, 1)?.set_name("row");
19456 assert_eq!(range.get_level_values(0)?, range.to_flat_index());
19457
19458 let cat =
19459 super::CategoricalIndex::from_values(vec!["a".to_owned()], false).set_name("category");
19460 assert_eq!(cat.get_level_values(0)?, cat.to_flat_index());
19461
19462 assert!(matches!(
19463 cat.get_level_values(1),
19464 Err(super::IndexError::OutOfBounds {
19465 position: 1,
19466 length: 1
19467 })
19468 ));
19469
19470 Ok(())
19471 }
19472
19473 #[test]
19474 fn index_variants_droplevel_forward_flat_errors_t8vpw() -> Result<(), super::IndexError> {
19475 const NS: i64 = 1_000_000_000;
19476
19477 let dt = super::DatetimeIndex::new(vec![NS, 2 * NS]).set_name("ts");
19478 assert!(matches!(
19479 dt.droplevel(0),
19480 Err(super::IndexError::InvalidArgument(message))
19481 if message == "cannot remove the only level from a flat Index"
19482 ));
19483
19484 let td = super::TimedeltaIndex::new(vec![5, 10]).set_name("delta");
19485 assert!(matches!(
19486 td.droplevel(0),
19487 Err(super::IndexError::InvalidArgument(message))
19488 if message == "cannot remove the only level from a flat Index"
19489 ));
19490
19491 use fp_types::{Period, PeriodFreq};
19492 let pi =
19493 super::PeriodIndex::new(vec![Period::new(1, PeriodFreq::Monthly)]).set_name("period");
19494 assert!(matches!(
19495 pi.droplevel(0),
19496 Err(super::IndexError::InvalidArgument(message))
19497 if message == "cannot remove the only level from a flat Index"
19498 ));
19499
19500 let range = super::RangeIndex::new(1, 4, 1)?.set_name("row");
19501 assert!(matches!(
19502 range.droplevel(0),
19503 Err(super::IndexError::InvalidArgument(message))
19504 if message == "cannot remove the only level from a flat Index"
19505 ));
19506
19507 let cat =
19508 super::CategoricalIndex::from_values(vec!["a".to_owned()], false).set_name("category");
19509 assert!(matches!(
19510 cat.droplevel(1),
19511 Err(super::IndexError::OutOfBounds {
19512 position: 1,
19513 length: 1
19514 })
19515 ));
19516
19517 Ok(())
19518 }
19519
19520 #[test]
19521 fn index_variants_groupby_forward_flat_buckets_vypi3() {
19522 const NS: i64 = 1_000_000_000;
19523
19524 let dt = super::DatetimeIndex::new(vec![NS, 2 * NS, NS]);
19525 assert_eq!(dt.groupby(), dt.to_flat_index().groupby());
19526
19527 let td = super::TimedeltaIndex::new(vec![5, 10, 5]);
19528 assert_eq!(td.groupby(), td.to_flat_index().groupby());
19529
19530 use fp_types::{Period, PeriodFreq};
19531 let pi = super::PeriodIndex::new(vec![
19532 Period::new(1, PeriodFreq::Monthly),
19533 Period::new(2, PeriodFreq::Monthly),
19534 Period::new(1, PeriodFreq::Monthly),
19535 ]);
19536 assert_eq!(pi.groupby(), pi.to_flat_index().groupby());
19537
19538 let range = super::RangeIndex::new(2, 8, 2).unwrap();
19539 assert_eq!(range.groupby(), range.to_flat_index().groupby());
19540
19541 let cat = super::CategoricalIndex::from_values(
19542 vec!["a".to_owned(), "b".to_owned(), "a".to_owned()],
19543 false,
19544 );
19545 assert_eq!(cat.groupby(), cat.to_flat_index().groupby());
19546 assert_eq!(
19547 cat.groupby()
19548 .get(&super::IndexLabel::Utf8("a".to_owned()))
19549 .cloned(),
19550 Some(vec![0, 2])
19551 );
19552 }
19553
19554 #[test]
19555 fn index_variants_map_forward_flat_and_preserve_name_vxlfs() {
19556 const NS: i64 = 1_000_000_000;
19557
19558 let dt = super::DatetimeIndex::new(vec![NS, 2 * NS]).set_name("ts");
19559 let dt_mapped = dt.map(|label| match label {
19560 super::IndexLabel::Datetime64(nanos) => super::IndexLabel::Int64(*nanos / NS),
19561 other => other.clone(),
19562 });
19563 assert_eq!(
19564 dt_mapped.labels(),
19565 &[super::IndexLabel::Int64(1), super::IndexLabel::Int64(2)]
19566 );
19567 assert_eq!(dt_mapped.name(), Some("ts"));
19568
19569 let td = super::TimedeltaIndex::new(vec![5, 10]).set_name("delta");
19570 assert_eq!(
19571 td.map(|label| match label {
19572 super::IndexLabel::Timedelta64(nanos) => super::IndexLabel::Int64(*nanos * 2),
19573 other => other.clone(),
19574 }),
19575 td.to_flat_index().map(|label| match label {
19576 super::IndexLabel::Timedelta64(nanos) => super::IndexLabel::Int64(*nanos * 2),
19577 other => other.clone(),
19578 })
19579 );
19580
19581 use fp_types::{Period, PeriodFreq};
19582 let pi = super::PeriodIndex::new(vec![Period::new(1, PeriodFreq::Monthly)]);
19583 assert_eq!(
19584 pi.map(|label| super::IndexLabel::Utf8(format!("p:{label}"))),
19585 pi.to_flat_index()
19586 .map(|label| super::IndexLabel::Utf8(format!("p:{label}")))
19587 );
19588
19589 let range = super::RangeIndex::new(1, 4, 1).unwrap();
19590 assert_eq!(
19591 range.map(|label| match label {
19592 super::IndexLabel::Int64(v) => super::IndexLabel::Int64(*v + 10),
19593 other => other.clone(),
19594 }),
19595 range.to_flat_index().map(|label| match label {
19596 super::IndexLabel::Int64(v) => super::IndexLabel::Int64(*v + 10),
19597 other => other.clone(),
19598 })
19599 );
19600
19601 let cat = super::CategoricalIndex::from_values(vec!["a".to_owned()], false);
19602 assert_eq!(
19603 cat.map(|label| super::IndexLabel::Utf8(label.to_string().to_uppercase())),
19604 cat.to_flat_index()
19605 .map(|label| super::IndexLabel::Utf8(label.to_string().to_uppercase()))
19606 );
19607 }
19608
19609 #[test]
19610 fn index_variants_astype_forward_flat_and_preserve_name_o5pyg() {
19611 const NS: i64 = 1_000_000_000;
19612
19613 let dt = super::DatetimeIndex::new(vec![NS, 2 * NS]).set_name("ts");
19614 assert_eq!(
19615 dt.astype("int64").unwrap(),
19616 dt.to_flat_index().astype("int64").unwrap()
19617 );
19618 assert_eq!(dt.astype("int64").unwrap().name(), Some("ts"));
19619 assert!(dt.astype("float64").is_err());
19620
19621 let td = super::TimedeltaIndex::new(vec![5, 10]).set_name("delta");
19622 assert_eq!(
19623 td.astype("string").unwrap(),
19624 td.to_flat_index().astype("string").unwrap()
19625 );
19626 assert_eq!(td.astype("string").unwrap().name(), Some("delta"));
19627
19628 use fp_types::{Period, PeriodFreq};
19629 let pi = super::PeriodIndex::new(vec![Period::new(1, PeriodFreq::Monthly)]);
19630 assert_eq!(
19631 pi.astype("object").unwrap(),
19632 pi.to_flat_index().astype("object").unwrap()
19633 );
19634
19635 let range = super::RangeIndex::new(1, 4, 1).unwrap().set_name("r");
19636 assert_eq!(
19637 range.astype("str").unwrap(),
19638 range.to_flat_index().astype("str").unwrap()
19639 );
19640 assert_eq!(range.astype("str").unwrap().name(), Some("r"));
19641
19642 let cat = super::CategoricalIndex::from_values(vec!["7".to_owned()], false);
19643 assert_eq!(
19644 cat.astype("int").unwrap(),
19645 cat.to_flat_index().astype("int").unwrap()
19646 );
19647 assert!(cat.astype("datetime64[ns]").is_err());
19648 }
19649
19650 #[test]
19651 fn index_variants_asof_forward_flat_and_mask_locs_955dj() {
19652 const NS: i64 = 1_000_000_000;
19653
19654 let dt = super::DatetimeIndex::new(vec![NS, 3 * NS, 5 * NS]);
19655 let dt_key = super::IndexLabel::Datetime64(4 * NS);
19656 assert_eq!(dt.asof(&dt_key), dt.to_flat_index().asof(&dt_key));
19657 assert_eq!(dt.asof(&super::IndexLabel::Datetime64(0)), None);
19658
19659 let td = super::TimedeltaIndex::new(vec![10, 20, 30]);
19660 let where_td = super::Index::new(vec![
19661 super::IndexLabel::Timedelta64(5),
19662 super::IndexLabel::Timedelta64(20),
19663 super::IndexLabel::Timedelta64(25),
19664 ]);
19665 let mask = [false, true, true];
19666 assert_eq!(
19667 td.asof_locs(&where_td, Some(&mask)),
19668 td.to_flat_index().asof_locs(&where_td, Some(&mask))
19669 );
19670 assert_eq!(
19671 td.asof_locs(&where_td, Some(&mask)),
19672 vec![None, Some(1), Some(1)]
19673 );
19674
19675 use fp_types::{Period, PeriodFreq};
19676 let pi = super::PeriodIndex::new(vec![
19677 Period::new(1, PeriodFreq::Monthly),
19678 Period::new(2, PeriodFreq::Monthly),
19679 ]);
19680 let period_key = pi.to_flat_index().labels()[1].clone();
19681 assert_eq!(pi.asof(&period_key), pi.to_flat_index().asof(&period_key));
19682
19683 let range = super::RangeIndex::new(2, 8, 2).unwrap();
19684 let range_key = super::IndexLabel::Int64(5);
19685 assert_eq!(
19686 range.asof(&range_key),
19687 range.to_flat_index().asof(&range_key)
19688 );
19689
19690 let cat = super::CategoricalIndex::from_values(
19691 vec!["a".to_owned(), "c".to_owned(), "e".to_owned()],
19692 false,
19693 );
19694 let cat_key = super::IndexLabel::Utf8("d".to_owned());
19695 assert_eq!(cat.asof(&cat_key), cat.to_flat_index().asof(&cat_key));
19696 }
19697
19698 #[test]
19699 fn index_variants_drop_join_sortlevel_forward_flat_gr6kj() {
19700 const NS: i64 = 1_000_000_000;
19701
19702 let dt = super::DatetimeIndex::new(vec![NS, 3 * NS, 5 * NS]).set_name("ts");
19703 let drop_dt = [super::IndexLabel::Datetime64(3 * NS)];
19704 assert_eq!(dt.drop(&drop_dt), dt.to_flat_index().drop(&drop_dt));
19705 assert_eq!(dt.drop(&drop_dt).name(), Some("ts"));
19706
19707 let td = super::TimedeltaIndex::new(vec![30, 10, 20]);
19708 let (td_sorted, td_order) = td.sortlevel();
19709 let (flat_td_sorted, flat_td_order) = td.to_flat_index().sortlevel();
19710 assert_eq!(td_sorted, flat_td_sorted);
19711 assert_eq!(td_order, flat_td_order);
19712
19713 use fp_types::{Period, PeriodFreq};
19714 let pi = super::PeriodIndex::new(vec![
19715 Period::new(2, PeriodFreq::Monthly),
19716 Period::new(1, PeriodFreq::Monthly),
19717 ]);
19718 assert_eq!(
19719 pi.join(&pi.to_flat_index(), "outer").unwrap(),
19720 pi.to_flat_index()
19721 );
19722
19723 let range = super::RangeIndex::new(2, 8, 2).unwrap();
19724 let other = super::Index::new(vec![
19725 super::IndexLabel::Int64(4),
19726 super::IndexLabel::Int64(6),
19727 super::IndexLabel::Int64(9),
19728 ]);
19729 assert_eq!(
19730 range.join(&other, "inner").unwrap(),
19731 range.to_flat_index().join(&other, "inner").unwrap()
19732 );
19733 assert!(range.join(&other, "sideways").is_err());
19734
19735 let cat = super::CategoricalIndex::from_values(
19736 vec!["b".to_owned(), "a".to_owned(), "b".to_owned()],
19737 false,
19738 );
19739 let (cat_sorted, cat_order) = cat.sortlevel();
19740 let (flat_cat_sorted, flat_cat_order) = cat.to_flat_index().sortlevel();
19741 assert_eq!(cat_sorted, flat_cat_sorted);
19742 assert_eq!(cat_order, flat_cat_order);
19743 let drop_cat = [super::IndexLabel::Utf8("b".to_owned())];
19744 assert_eq!(cat.drop(&drop_cat), cat.to_flat_index().drop(&drop_cat));
19745 }
19746
19747 #[test]
19748 fn index_variants_temporal_rounding_forwarders_dznxu() {
19749 let hour = fp_types::Timedelta::NANOS_PER_HOUR;
19750 let minute = fp_types::Timedelta::NANOS_PER_MIN;
19751 let nat = fp_types::Timedelta::NAT;
19752 let dt =
19753 super::DatetimeIndex::new(vec![hour / 2, hour + 31 * minute, i64::MIN]).set_name("ts");
19754
19755 let dt_floor = dt.floor("h").unwrap();
19756 assert_eq!(dt_floor.asi8(), vec![0, hour, i64::MIN]);
19757 assert_eq!(dt_floor.name(), Some("ts"));
19758
19759 let dt_ceil = dt.ceil("h").unwrap();
19760 assert_eq!(dt_ceil.asi8(), vec![hour, 2 * hour, i64::MIN]);
19761
19762 let dt_round = dt.round("h").unwrap();
19763 assert_eq!(dt_round.asi8(), vec![0, 2 * hour, i64::MIN]);
19764
19765 let dt_snap = dt.snap("h").unwrap();
19766 assert_eq!(dt_snap.asi8(), dt.asi8());
19767 assert!(dt.floor("not-a-frequency").is_err());
19768 assert!(dt.snap("not-a-frequency").is_err());
19769
19770 let td = super::TimedeltaIndex::new(vec![hour / 2, hour + 31 * minute, nat]).set_name("d");
19771 assert_eq!(td.floor("h").unwrap().asi8(), vec![0, hour, nat]);
19772 assert_eq!(td.ceil("h").unwrap().asi8(), vec![hour, 2 * hour, nat]);
19773 assert_eq!(td.round("h").unwrap().asi8(), vec![0, 2 * hour, nat]);
19774 assert_eq!(td.round("h").unwrap().name(), Some("d"));
19775 assert!(td.ceil("not-a-frequency").is_err());
19776
19777 use fp_types::{Period, PeriodFreq};
19778 let periods = super::PeriodIndex::new(vec![
19779 Period::new(10, PeriodFreq::Monthly),
19780 Period::new(11, PeriodFreq::Monthly),
19781 ])
19782 .set_name("p");
19783 let rounded_periods = periods.round("not-a-frequency");
19784 assert_eq!(rounded_periods.values(), periods.values());
19785 assert_eq!(rounded_periods.name(), Some("p"));
19786 }
19787
19788 #[test]
19789 fn index_variants_diff_forwarders_lqs0a() {
19790 let day = fp_types::Timedelta::NANOS_PER_DAY;
19791 let nat = fp_types::Timedelta::NAT;
19792
19793 let dt = super::DatetimeIndex::new(vec![day, 3 * day, i64::MIN, 10 * day]).set_name("ts");
19794 assert_eq!(dt.diff(1).asi8(), vec![nat, 2 * day, nat, nat]);
19795 assert_eq!(dt.diff(-1).asi8(), vec![-2 * day, nat, nat, nat]);
19796 assert_eq!(dt.diff(0).asi8(), vec![0, 0, nat, 0]);
19797 assert_eq!(dt.diff(1).name(), Some("ts"));
19798
19799 let td = super::TimedeltaIndex::new(vec![day, 4 * day, nat, 9 * day]).set_name("delta");
19800 assert_eq!(td.diff(2).asi8(), vec![nat, nat, nat, 5 * day]);
19801 assert_eq!(td.diff(-1).asi8(), vec![-3 * day, nat, nat, nat]);
19802 assert_eq!(td.diff(0).asi8(), vec![0, 0, nat, 0]);
19803 assert_eq!(td.diff(1).name(), Some("delta"));
19804
19805 use fp_types::{Period, PeriodFreq};
19806 let periods = super::PeriodIndex::new(vec![
19807 Period::new(10, PeriodFreq::Monthly),
19808 Period::new(12, PeriodFreq::Monthly),
19809 Period::new(13, PeriodFreq::Quarterly),
19810 Period::new(15, PeriodFreq::Quarterly),
19811 ]);
19812 assert_eq!(periods.diff(1), vec![None, Some(2), None, Some(2)]);
19813 assert_eq!(periods.diff(-1), vec![Some(-2), None, Some(-2), None]);
19814 assert_eq!(periods.diff(0), vec![Some(0), Some(0), Some(0), Some(0)]);
19815
19816 let range = super::RangeIndex::new(2, 10, 2).unwrap().set_name("r");
19817 assert_eq!(range.diff(1), vec![None, Some(2), Some(2), Some(2)]);
19818 assert_eq!(range.diff(-2), vec![Some(-4), Some(-4), None, None]);
19819 assert_eq!(range.diff(0), vec![Some(0), Some(0), Some(0), Some(0)]);
19820 assert_eq!(range.name(), Some("r"));
19821
19822 let cat = super::CategoricalIndex::from_values(vec!["a".to_owned(), "b".to_owned()], false);
19823 let err = cat.diff(1).unwrap_err();
19824 assert!(matches!(
19825 err,
19826 super::IndexError::InvalidArgument(message)
19827 if message.contains("Categorical has no 'diff' method")
19828 ));
19829 }
19830
19831 #[test]
19832 fn datetime_index_to_period_matches_pandas_ordinals_002sq()
19833 -> Result<(), Box<dyn std::error::Error>> {
19834 fn ns(value: &str) -> Result<i64, super::DateRangeError> {
19835 super::parse_datetime_to_nanos(value)
19836 }
19837
19838 use fp_types::{Period, PeriodFreq};
19839
19840 let dt = super::DatetimeIndex::new(vec![
19841 ns("1969-12-31 23:59:59")?,
19842 ns("1970-01-01 00:00:00")?,
19843 ns("2024-02-29 12:34:56")?,
19844 ])
19845 .set_name("ts");
19846
19847 assert_eq!(
19848 dt.to_period("Y")?.values(),
19849 &[
19850 Period::new(-1, PeriodFreq::Annual),
19851 Period::new(0, PeriodFreq::Annual),
19852 Period::new(54, PeriodFreq::Annual),
19853 ]
19854 );
19855 assert_eq!(
19856 dt.to_period("Q")?.values(),
19857 &[
19858 Period::new(-1, PeriodFreq::Quarterly),
19859 Period::new(0, PeriodFreq::Quarterly),
19860 Period::new(216, PeriodFreq::Quarterly),
19861 ]
19862 );
19863 assert_eq!(
19864 dt.to_period("M")?.values(),
19865 &[
19866 Period::new(-1, PeriodFreq::Monthly),
19867 Period::new(0, PeriodFreq::Monthly),
19868 Period::new(649, PeriodFreq::Monthly),
19869 ]
19870 );
19871 assert_eq!(
19872 dt.to_period("D")?.values(),
19873 &[
19874 Period::new(-1, PeriodFreq::Daily),
19875 Period::new(0, PeriodFreq::Daily),
19876 Period::new(19_782, PeriodFreq::Daily),
19877 ]
19878 );
19879 assert_eq!(
19880 dt.to_period("W")?.values(),
19881 &[
19882 Period::new(1, PeriodFreq::Weekly),
19883 Period::new(1, PeriodFreq::Weekly),
19884 Period::new(2_827, PeriodFreq::Weekly),
19885 ]
19886 );
19887 assert_eq!(
19888 dt.to_period("B")?.values(),
19889 &[
19890 Period::new(-1, PeriodFreq::Business),
19891 Period::new(0, PeriodFreq::Business),
19892 Period::new(14_130, PeriodFreq::Business),
19893 ]
19894 );
19895 assert_eq!(
19896 dt.to_period("H")?.values(),
19897 &[
19898 Period::new(-1, PeriodFreq::Hourly),
19899 Period::new(0, PeriodFreq::Hourly),
19900 Period::new(474_780, PeriodFreq::Hourly),
19901 ]
19902 );
19903 let minutely = dt.to_period("min")?;
19904 assert_eq!(
19905 minutely.values(),
19906 &[
19907 Period::new(-1, PeriodFreq::Minutely),
19908 Period::new(0, PeriodFreq::Minutely),
19909 Period::new(28_486_834, PeriodFreq::Minutely),
19910 ]
19911 );
19912 assert_eq!(minutely.name(), Some("ts"));
19913 assert_eq!(
19914 dt.to_period("S")?.values(),
19915 &[
19916 Period::new(-1, PeriodFreq::Secondly),
19917 Period::new(0, PeriodFreq::Secondly),
19918 Period::new(1_709_210_096, PeriodFreq::Secondly),
19919 ]
19920 );
19921
19922 assert!(matches!(
19923 super::DatetimeIndex::new(vec![i64::MIN]).to_period("M"),
19924 Err(super::IndexError::InvalidArgument(message))
19925 if message.contains("invalid or NaT datetime nanos")
19926 ));
19927 assert!(matches!(
19928 dt.to_period("fortnight"),
19929 Err(super::IndexError::InvalidArgument(message))
19930 if message.contains("unsupported frequency")
19931 ));
19932
19933 Ok(())
19934 }
19935
19936 #[test]
19937 fn period_index_asfreq_boundary_conversion_h1zia() -> Result<(), super::IndexError> {
19938 use fp_types::{Period, PeriodFreq};
19939
19940 let annual = super::PeriodIndex::new(vec![
19941 Period::new(0, PeriodFreq::Annual),
19942 Period::new(1, PeriodFreq::Annual),
19943 ])
19944 .set_name("p");
19945 assert_eq!(
19946 annual.asfreq("M")?.values(),
19947 &[
19948 Period::new(11, PeriodFreq::Monthly),
19949 Period::new(23, PeriodFreq::Monthly),
19950 ]
19951 );
19952 let annual_start = annual.asfreq_with_how("M", "start")?;
19953 assert_eq!(
19954 annual_start.values(),
19955 &[
19956 Period::new(0, PeriodFreq::Monthly),
19957 Period::new(12, PeriodFreq::Monthly),
19958 ]
19959 );
19960 assert_eq!(annual_start.name(), Some("p"));
19961
19962 let quarterly = super::PeriodIndex::new(vec![
19963 Period::new(0, PeriodFreq::Quarterly),
19964 Period::new(1, PeriodFreq::Quarterly),
19965 ]);
19966 assert_eq!(
19967 quarterly.asfreq("D")?.values(),
19968 &[
19969 Period::new(89, PeriodFreq::Daily),
19970 Period::new(180, PeriodFreq::Daily),
19971 ]
19972 );
19973 assert_eq!(
19974 quarterly.asfreq_with_how("D", "s")?.values(),
19975 &[
19976 Period::new(0, PeriodFreq::Daily),
19977 Period::new(90, PeriodFreq::Daily),
19978 ]
19979 );
19980
19981 let monthly = super::PeriodIndex::new(vec![
19982 Period::new(0, PeriodFreq::Monthly),
19983 Period::new(1, PeriodFreq::Monthly),
19984 ]);
19985 assert_eq!(
19986 monthly.asfreq("S")?.values(),
19987 &[
19988 Period::new(2_678_399, PeriodFreq::Secondly),
19989 Period::new(5_097_599, PeriodFreq::Secondly),
19990 ]
19991 );
19992 assert_eq!(
19993 monthly.asfreq_with_how("S", "begin")?.values(),
19994 &[
19995 Period::new(0, PeriodFreq::Secondly),
19996 Period::new(2_678_400, PeriodFreq::Secondly),
19997 ]
19998 );
19999 assert_eq!(
20000 monthly.asfreq("B")?.values(),
20001 &[
20002 Period::new(21, PeriodFreq::Business),
20003 Period::new(41, PeriodFreq::Business),
20004 ]
20005 );
20006 assert_eq!(
20007 monthly.asfreq_with_how("W", "start")?.values(),
20008 &[
20009 Period::new(1, PeriodFreq::Weekly),
20010 Period::new(5, PeriodFreq::Weekly),
20011 ]
20012 );
20013 assert!(matches!(
20014 monthly.asfreq_with_how("D", "middle"),
20015 Err(super::IndexError::InvalidArgument(message))
20016 if message.contains("asfreq how must be 'start' or 'end'")
20017 ));
20018 assert!(matches!(
20019 monthly.asfreq("fortnight"),
20020 Err(super::IndexError::InvalidArgument(message))
20021 if message.contains("unsupported frequency")
20022 ));
20023
20024 Ok(())
20025 }
20026
20027 #[test]
20028 fn period_index_timestamp_boundaries_d44wh() -> Result<(), Box<dyn std::error::Error>> {
20029 fn ns(value: &str) -> Result<i64, super::DateRangeError> {
20030 super::parse_datetime_to_nanos(value)
20031 }
20032
20033 use fp_types::{Period, PeriodFreq};
20034
20035 let monthly = super::PeriodIndex::new(vec![
20036 Period::new(0, PeriodFreq::Monthly),
20037 Period::new(1, PeriodFreq::Monthly),
20038 ])
20039 .set_name("period");
20040 assert_eq!(
20041 monthly.start_time()?.asi8(),
20042 vec![ns("1970-01-01 00:00:00")?, ns("1970-02-01 00:00:00")?]
20043 );
20044 assert_eq!(
20045 monthly.end_time()?.asi8(),
20046 vec![
20047 ns("1970-02-01 00:00:00")? - 1,
20048 ns("1970-03-01 00:00:00")? - 1
20049 ]
20050 );
20051 assert_eq!(
20052 monthly.to_timestamp("start")?.asi8(),
20053 monthly.start_time()?.asi8()
20054 );
20055 assert_eq!(
20056 monthly.to_timestamp("end")?.asi8(),
20057 monthly.end_time()?.asi8()
20058 );
20059 assert_eq!(monthly.to_timestamp("")?.name(), Some("period"));
20060 assert_eq!(monthly.qyear()?, vec![1970, 1970]);
20061 assert!(matches!(
20062 monthly.to_timestamp("middle"),
20063 Err(super::IndexError::InvalidArgument(message))
20064 if message.contains("to_timestamp how must be 'start' or 'end'")
20065 ));
20066
20067 let quarterly = super::PeriodIndex::new(vec![
20068 Period::new(-1, PeriodFreq::Quarterly),
20069 Period::new(0, PeriodFreq::Quarterly),
20070 ]);
20071 assert_eq!(
20072 quarterly.start_time()?.asi8(),
20073 vec![ns("1969-10-01 00:00:00")?, ns("1970-01-01 00:00:00")?]
20074 );
20075 assert_eq!(
20076 quarterly.end_time()?.asi8(),
20077 vec![
20078 ns("1970-01-01 00:00:00")? - 1,
20079 ns("1970-04-01 00:00:00")? - 1
20080 ]
20081 );
20082 assert_eq!(quarterly.qyear()?, vec![1969, 1970]);
20083
20084 let mixed_freq = super::PeriodIndex::new(vec![
20085 Period::new(1, PeriodFreq::Weekly),
20086 Period::new(2, PeriodFreq::Business),
20087 Period::new(1, PeriodFreq::Hourly),
20088 ]);
20089 assert_eq!(
20090 mixed_freq.start_time()?.asi8(),
20091 vec![
20092 ns("1969-12-29 00:00:00")?,
20093 ns("1970-01-05 00:00:00")?,
20094 fp_types::Timedelta::NANOS_PER_HOUR
20095 ]
20096 );
20097 assert_eq!(
20098 mixed_freq.end_time()?.asi8(),
20099 vec![
20100 ns("1970-01-05 00:00:00")? - 1,
20101 ns("1970-01-06 00:00:00")? - 1,
20102 2 * fp_types::Timedelta::NANOS_PER_HOUR - 1
20103 ]
20104 );
20105 assert_eq!(mixed_freq.qyear()?, vec![1970, 1970, 1970]);
20106
20107 Ok(())
20108 }
20109
20110 #[test]
20111 fn index_variants_view_transpose_ravel_nlevels_infer_objects_match_pandas_d0ph1() {
20112 const NS: i64 = 1_000_000_000;
20113 let dt = super::DatetimeIndex::new(vec![1_704_067_200_i64 * NS, i64::MIN]).set_name("ts");
20114 assert!(dt.view().equals(&dt));
20115 assert!(dt.transpose().equals(&dt));
20116 assert!(dt.T().identical(&dt));
20117 assert_eq!(dt.ravel(), dt.values());
20118 assert_eq!(dt.nlevels(), 1);
20119 assert!(dt.infer_objects().equals(&dt));
20120
20121 let td = super::TimedeltaIndex::new(vec![100_i64, fp_types::Timedelta::NAT]).set_name("d");
20122 assert!(td.view().equals(&td));
20123 assert!(td.T().identical(&td));
20124 assert_eq!(td.ravel(), td.values());
20125 assert_eq!(td.nlevels(), 1);
20126
20127 use fp_types::{Period, PeriodFreq};
20128 let pi = super::PeriodIndex::new(vec![
20129 Period::new(10, PeriodFreq::Monthly),
20130 Period::new(11, PeriodFreq::Monthly),
20131 ]);
20132 assert_eq!(pi.view().values(), pi.values());
20133 assert!(pi.T().identical(&pi));
20134 assert_eq!(pi.ravel(), pi.values().to_vec());
20135 assert_eq!(pi.nlevels(), 1);
20136
20137 let r = super::RangeIndex::new(0, 5, 1).unwrap();
20138 assert!(r.view().equals(&r));
20139 assert!(r.T().identical(&r));
20140 assert_eq!(r.ravel(), r.values());
20141 assert_eq!(r.nlevels(), 1);
20142
20143 let cat = super::CategoricalIndex::from_values(vec!["a".to_owned(), "b".to_owned()], false);
20144 assert_eq!(cat.view().labels(), cat.labels());
20145 assert!(cat.T().identical(&cat));
20146 assert_eq!(cat.ravel(), cat.labels().to_vec());
20147 assert_eq!(cat.nlevels(), 1);
20148 }
20149
20150 #[test]
20151 fn datetime_index_set_ops_match_pandas_ik8if() {
20152 const NS: i64 = 1_000_000_000;
20153 let a = 1_704_067_200_i64 * NS;
20154 let b = 1_705_276_800_i64 * NS;
20155 let c = 1_706_140_800_i64 * NS;
20156 let d = 1_707_350_400_i64 * NS;
20157 let left = super::DatetimeIndex::new(vec![a, b, c]).set_name("ts");
20158 let right = super::DatetimeIndex::new(vec![b, c, d]).set_name("ts");
20159
20160 let inter = left.intersection(&right);
20162 assert_eq!(inter.values(), vec![Some(b), Some(c)]);
20163 assert_eq!(inter.name(), Some("ts"));
20164
20165 let union = left.union(&right);
20167 assert_eq!(union.values(), vec![Some(a), Some(b), Some(c), Some(d)]);
20168
20169 let diff = left.difference(&right);
20171 assert_eq!(diff.values(), vec![Some(a)]);
20172
20173 let sym = left.symmetric_difference(&right);
20175 assert_eq!(sym.values(), vec![Some(a), Some(d)]);
20176
20177 let mismatched = super::DatetimeIndex::new(vec![b]).set_name("other");
20179 assert_eq!(left.intersection(&mismatched).name(), None);
20180 assert_eq!(left.union(&mismatched).name(), None);
20181 }
20182
20183 #[test]
20184 fn timedelta_index_set_ops_match_pandas_ik8if() {
20185 let left = super::TimedeltaIndex::new(vec![100_i64, 200, 300]).set_name("d");
20186 let right = super::TimedeltaIndex::new(vec![200_i64, 300, 400]).set_name("d");
20187
20188 let inter = left.intersection(&right);
20189 assert_eq!(inter.values(), vec![Some(200), Some(300)]);
20190 assert_eq!(inter.name(), Some("d"));
20191
20192 let union = left.union(&right);
20193 assert_eq!(
20194 union.values(),
20195 vec![Some(100), Some(200), Some(300), Some(400)]
20196 );
20197
20198 let diff = left.difference(&right);
20199 assert_eq!(diff.values(), vec![Some(100)]);
20200
20201 let sym = left.symmetric_difference(&right);
20202 assert_eq!(sym.values(), vec![Some(100), Some(400)]);
20203 }
20204
20205 #[test]
20206 fn timedelta_index_sum_match_pandas_qi04e() {
20207 let nat = fp_types::Timedelta::NAT;
20208 let td = super::TimedeltaIndex::new(vec![10_i64, 20, 30, nat]);
20209 assert_eq!(td.sum(), Some(60));
20210
20211 let only_nat = super::TimedeltaIndex::new(vec![nat, nat]);
20212 assert_eq!(only_nat.sum(), Some(0));
20213
20214 let empty = super::TimedeltaIndex::new(vec![]);
20215 assert_eq!(empty.sum(), Some(0));
20216 }
20217
20218 #[test]
20219 fn datetime_timedelta_var_match_pandas_pw5sn() {
20220 let td = super::TimedeltaIndex::new(vec![10_i64, 20, 30]);
20222 assert!((td.var().unwrap() - 100.0).abs() < 1e-9);
20223
20224 let one = super::TimedeltaIndex::new(vec![5_i64]);
20226 assert_eq!(one.var(), None);
20227
20228 const NS: i64 = 1_000_000_000;
20230 let dt = super::DatetimeIndex::new(vec![10 * NS, 20 * NS, 30 * NS]);
20231 assert!(dt.var().is_some());
20232 }
20233
20234 #[test]
20235 fn datetime_timedelta_std_match_pandas_3hb3t() {
20236 let td = super::TimedeltaIndex::new(vec![10_i64, 20, 30]);
20238 assert_eq!(td.std(), Some(10));
20239
20240 let td2 = super::TimedeltaIndex::new(vec![10_i64, 30]);
20242 let expected = 200f64.sqrt() as i64;
20243 assert_eq!(td2.std(), Some(expected));
20244
20245 let one = super::TimedeltaIndex::new(vec![5_i64]);
20247 assert_eq!(one.std(), None);
20248 let nat = super::TimedeltaIndex::new(vec![fp_types::Timedelta::NAT]);
20249 assert_eq!(nat.std(), None);
20250
20251 const NS: i64 = 1_000_000_000;
20253 let dt = super::DatetimeIndex::new(vec![10 * NS, 20 * NS, 30 * NS]);
20254 assert!(dt.std().is_some());
20255 }
20256
20257 #[test]
20258 fn datetime_timedelta_shift_match_pandas_1y3sx() {
20259 const NS: i64 = 1_000_000_000;
20260 let day_ns = 86_400 * NS;
20261 let dt = super::DatetimeIndex::new(vec![1_704_067_200_i64 * NS, i64::MIN]).set_name("ts");
20262
20263 let shifted = dt.shift(2, day_ns);
20265 assert_eq!(
20266 shifted.values()[0],
20267 Some(1_704_067_200_i64 * NS + 2 * day_ns)
20268 );
20269 assert_eq!(shifted.values()[1], None);
20270 assert_eq!(shifted.name(), Some("ts"));
20271
20272 let back = dt.shift(-1, day_ns);
20274 assert_eq!(back.values()[0], Some(1_704_067_200_i64 * NS - day_ns));
20275
20276 let td = super::TimedeltaIndex::new(vec![100_i64, fp_types::Timedelta::NAT]);
20278 let shifted_td = td.shift(3, 50);
20279 assert_eq!(shifted_td.values()[0], Some(250));
20280 assert_eq!(shifted_td.values()[1], None);
20281 }
20282
20283 #[test]
20284 fn datetime_timedelta_mean_median_match_pandas_wp0gr() {
20285 const NS: i64 = 1_000_000_000;
20286 let a = 1_000_000_000_i64 * NS;
20287 let b = 2_000_000_000_i64 * NS;
20288 let c = 3_000_000_000_i64 * NS;
20289 let dt = super::DatetimeIndex::new(vec![a, b, c, i64::MIN]);
20290 assert_eq!(dt.mean(), Some(b));
20292 assert_eq!(dt.median(), Some(b));
20294
20295 let dt_even = super::DatetimeIndex::new(vec![a, b]);
20297 let total = i128::from(a) + i128::from(b);
20298 let expected = i64::try_from(total / 2).unwrap();
20299 assert_eq!(dt_even.median(), Some(expected));
20300
20301 let nat = super::DatetimeIndex::new(vec![i64::MIN; 3]);
20303 assert_eq!(nat.mean(), None);
20304 assert_eq!(nat.median(), None);
20305
20306 let td = super::TimedeltaIndex::new(vec![10_i64, 20, 30]);
20308 assert_eq!(td.mean(), Some(20));
20309 assert_eq!(td.median(), Some(20));
20310 }
20311
20312 #[test]
20313 fn datetime_index_min_max_sort_values_match_pandas_kastf() {
20314 const NS: i64 = 1_000_000_000;
20315 let a = 1_704_067_200_i64 * NS;
20316 let b = 1_705_276_800_i64 * NS;
20317 let c = 1_706_140_800_i64 * NS;
20318 let dt = super::DatetimeIndex::new(vec![b, c, i64::MIN, a]).set_name("ts");
20319
20320 assert_eq!(dt.min(), Some(a));
20321 assert_eq!(dt.max(), Some(c));
20322
20323 let sorted = dt.sort_values();
20324 let sorted_alias = dt.sort();
20325 assert_eq!(sorted.values(), vec![None, Some(a), Some(b), Some(c)]);
20327 assert_eq!(sorted_alias.values(), sorted.values());
20328 assert_eq!(sorted.name(), Some("ts"));
20329 assert_eq!(sorted_alias.name(), Some("ts"));
20330
20331 let all_nat = super::DatetimeIndex::new(vec![i64::MIN, i64::MIN]);
20332 assert_eq!(all_nat.min(), None);
20333 assert_eq!(all_nat.max(), None);
20334
20335 let empty = super::DatetimeIndex::new(vec![]);
20336 assert_eq!(empty.min(), None);
20337 assert_eq!(empty.max(), None);
20338 assert!(empty.sort_values().is_empty());
20339 assert!(empty.sort().is_empty());
20340 }
20341
20342 #[test]
20343 fn timedelta_index_min_max_sort_values_match_pandas_kastf() {
20344 let nat = fp_types::Timedelta::NAT;
20345 let td = super::TimedeltaIndex::new(vec![300_i64, nat, 100, 200]).set_name("d");
20346
20347 assert_eq!(td.min(), Some(100));
20348 assert_eq!(td.max(), Some(300));
20349
20350 let sorted = td.sort_values();
20351 let sorted_alias = td.sort();
20352 assert_eq!(sorted.values(), vec![None, Some(100), Some(200), Some(300)]);
20353 assert_eq!(sorted_alias.values(), sorted.values());
20354 assert_eq!(sorted.name(), Some("d"));
20355 assert_eq!(sorted_alias.name(), Some("d"));
20356
20357 let all_nat = super::TimedeltaIndex::new(vec![nat, nat]);
20358 assert_eq!(all_nat.min(), None);
20359 assert_eq!(all_nat.max(), None);
20360
20361 let empty = super::TimedeltaIndex::new(vec![]);
20362 assert_eq!(empty.min(), None);
20363 assert_eq!(empty.max(), None);
20364 assert!(empty.sort().is_empty());
20365 }
20366
20367 #[test]
20368 fn datetime_index_append_delete_match_pandas_834v9() -> Result<(), super::IndexError> {
20369 const NS: i64 = 1_000_000_000;
20370 let a = 1_704_067_200_i64 * NS;
20371 let b = 1_705_276_800_i64 * NS;
20372 let c = 1_706_140_800_i64 * NS;
20373 let left = super::DatetimeIndex::new(vec![a, b]).set_name("ts");
20374 let right = super::DatetimeIndex::new(vec![c]).set_name("ts");
20375
20376 let merged = left.append(&right);
20377 assert_eq!(merged.values(), vec![Some(a), Some(b), Some(c)]);
20378 assert_eq!(merged.name(), Some("ts"));
20379
20380 let mismatched = super::DatetimeIndex::new(vec![c]).set_name("other");
20381 assert_eq!(left.append(&mismatched).name(), None);
20382
20383 let trimmed = left.append(&right).delete(1)?;
20384 assert_eq!(trimmed.values(), vec![Some(a), Some(c)]);
20385 assert_eq!(trimmed.name(), Some("ts"));
20386
20387 let oob = left.delete(5).unwrap_err();
20388 assert!(matches!(
20389 oob,
20390 super::IndexError::OutOfBounds {
20391 position: 5,
20392 length: 2
20393 }
20394 ));
20395 Ok(())
20396 }
20397
20398 #[test]
20399 fn timedelta_index_append_delete_match_pandas_834v9() -> Result<(), super::IndexError> {
20400 let left = super::TimedeltaIndex::new(vec![1_i64, 2]).set_name("d");
20401 let right = super::TimedeltaIndex::new(vec![3_i64]).set_name("d");
20402 let merged = left.append(&right);
20403 assert_eq!(merged.values(), vec![Some(1), Some(2), Some(3)]);
20404 assert_eq!(merged.name(), Some("d"));
20405
20406 let trimmed = merged.delete(0)?;
20407 assert_eq!(trimmed.values(), vec![Some(2), Some(3)]);
20408
20409 assert!(matches!(
20410 left.delete(7).unwrap_err(),
20411 super::IndexError::OutOfBounds {
20412 position: 7,
20413 length: 2
20414 }
20415 ));
20416 Ok(())
20417 }
20418
20419 #[test]
20420 fn period_index_append_delete_match_pandas_834v9() -> Result<(), super::IndexError> {
20421 use fp_types::{Period, PeriodFreq};
20422 let p1 = Period::new(10, PeriodFreq::Monthly);
20423 let p2 = Period::new(11, PeriodFreq::Monthly);
20424 let p3 = Period::new(12, PeriodFreq::Monthly);
20425 let left = super::PeriodIndex::new(vec![p1, p2]).set_name("p");
20426 let right = super::PeriodIndex::new(vec![p3]).set_name("p");
20427
20428 let merged = left.append(&right);
20429 assert_eq!(merged.values(), &[p1, p2, p3]);
20430 assert_eq!(merged.name(), Some("p"));
20431
20432 let mismatched = super::PeriodIndex::new(vec![p3]).set_name("other");
20433 assert_eq!(left.append(&mismatched).name(), None);
20434
20435 let trimmed = merged.delete(1)?;
20436 assert_eq!(trimmed.values(), &[p1, p3]);
20437
20438 assert!(matches!(
20439 left.delete(5).unwrap_err(),
20440 super::IndexError::OutOfBounds {
20441 position: 5,
20442 length: 2
20443 }
20444 ));
20445 Ok(())
20446 }
20447
20448 #[test]
20449 fn range_index_append_delete_match_pandas_834v9() -> Result<(), super::IndexError> {
20450 let left = super::RangeIndex::new(0, 3, 1).unwrap();
20451 let right = super::RangeIndex::new(10, 12, 1).unwrap();
20452 let merged = left.append(&right);
20453 let merged_labels = int64_labels(&merged);
20454 assert_eq!(merged_labels, vec![0, 1, 2, 10, 11]);
20455
20456 let trimmed = left.delete(1)?;
20457 let trimmed_labels = int64_labels(&trimmed);
20458 assert_eq!(trimmed_labels, vec![0, 2]);
20459
20460 assert!(matches!(
20461 left.delete(99).unwrap_err(),
20462 super::IndexError::OutOfBounds {
20463 position: 99,
20464 length: 3
20465 }
20466 ));
20467 Ok(())
20468 }
20469
20470 #[test]
20471 fn datetime_index_take_repeat_isin_match_pandas_bbgg3() -> Result<(), super::IndexError> {
20472 const NS: i64 = 1_000_000_000;
20473 let a = 1_704_067_200_i64 * NS;
20474 let b = 1_705_276_800_i64 * NS;
20475 let c = 1_706_140_800_i64 * NS;
20476 let dt = super::DatetimeIndex::new(vec![a, b, c]).set_name("ts");
20477
20478 let taken = dt.take(&[2, 0, 0])?;
20479 assert_eq!(taken.values(), vec![Some(c), Some(a), Some(a)]);
20480 assert_eq!(taken.name(), Some("ts"));
20481
20482 let oob = dt.take(&[3]).unwrap_err();
20483 assert!(matches!(
20484 oob,
20485 super::IndexError::OutOfBounds {
20486 position: 3,
20487 length: 3
20488 }
20489 ));
20490
20491 let repeated = dt.repeat(2);
20492 assert_eq!(
20493 repeated.values(),
20494 vec![Some(a), Some(a), Some(b), Some(b), Some(c), Some(c)]
20495 );
20496 assert_eq!(repeated.name(), Some("ts"));
20497
20498 let mask = dt.isin(&[a, c]);
20499 assert_eq!(mask, vec![true, false, true]);
20500
20501 let nat_idx = super::DatetimeIndex::new(vec![i64::MIN, a]);
20502 assert_eq!(nat_idx.isin(&[i64::MIN]), vec![true, false]);
20503 Ok(())
20504 }
20505
20506 #[test]
20507 fn timedelta_index_take_repeat_isin_match_pandas_bbgg3() -> Result<(), super::IndexError> {
20508 let td = super::TimedeltaIndex::new(vec![100_i64, 200, 300]).set_name("d");
20509 let taken = td.take(&[2, 0])?;
20510 assert_eq!(taken.values(), vec![Some(300), Some(100)]);
20511 assert_eq!(taken.name(), Some("d"));
20512
20513 assert!(matches!(
20514 td.take(&[7]).unwrap_err(),
20515 super::IndexError::OutOfBounds {
20516 position: 7,
20517 length: 3
20518 }
20519 ));
20520
20521 let repeated = td.repeat(2);
20522 assert_eq!(
20523 repeated.values(),
20524 vec![
20525 Some(100),
20526 Some(100),
20527 Some(200),
20528 Some(200),
20529 Some(300),
20530 Some(300)
20531 ]
20532 );
20533
20534 let mask = td.isin(&[200, 999]);
20535 assert_eq!(mask, vec![false, true, false]);
20536 Ok(())
20537 }
20538
20539 #[test]
20540 fn period_index_take_repeat_isin_match_pandas_bbgg3() -> Result<(), super::IndexError> {
20541 use fp_types::{Period, PeriodFreq};
20542 let p1 = Period::new(10, PeriodFreq::Monthly);
20543 let p2 = Period::new(11, PeriodFreq::Monthly);
20544 let p3 = Period::new(12, PeriodFreq::Monthly);
20545 let pi = super::PeriodIndex::new(vec![p1, p2, p3]).set_name("pp");
20546
20547 let taken = pi.take(&[2, 1])?;
20548 assert_eq!(taken.values(), &[p3, p2]);
20549 assert_eq!(taken.name(), Some("pp"));
20550
20551 assert!(matches!(
20552 pi.take(&[5]).unwrap_err(),
20553 super::IndexError::OutOfBounds {
20554 position: 5,
20555 length: 3
20556 }
20557 ));
20558
20559 let repeated = pi.repeat(2);
20560 assert_eq!(repeated.values(), &[p1, p1, p2, p2, p3, p3]);
20561
20562 let mask = pi.isin(&[p1, p3]);
20563 assert_eq!(mask, vec![true, false, true]);
20564 Ok(())
20565 }
20566
20567 #[test]
20568 fn range_index_take_repeat_isin_match_pandas_bbgg3() -> Result<(), super::IndexError> {
20569 let r = super::RangeIndex::new(0, 5, 1).unwrap();
20570 let taken = r.take(&[2, 4, 0])?;
20571 let labels = int64_labels(&taken);
20572 assert_eq!(labels, vec![2, 4, 0]);
20573
20574 assert!(matches!(
20575 r.take(&[10]).unwrap_err(),
20576 super::IndexError::OutOfBounds {
20577 position: 10,
20578 length: 5
20579 }
20580 ));
20581
20582 let repeated = r.repeat(2);
20583 let repeat_labels = int64_labels(&repeated);
20584 assert_eq!(repeat_labels, vec![0, 0, 1, 1, 2, 2, 3, 3, 4, 4]);
20585
20586 let mask = r.isin(&[1, 3, 99]);
20587 assert_eq!(mask, vec![false, true, false, true, false]);
20588 Ok(())
20589 }
20590
20591 #[test]
20592 fn period_index_forwarder_methods_match_pandas_zke9k() {
20593 use fp_types::{Period, PeriodFreq};
20594 let p1 = Period::new(10, PeriodFreq::Monthly);
20595 let p2 = Period::new(11, PeriodFreq::Monthly);
20596 let p3 = Period::new(12, PeriodFreq::Monthly);
20597 let pi = super::PeriodIndex::new(vec![p1, p2, p1, p3, p2, p1]).set_name("p");
20598
20599 let unique = pi.unique();
20600 assert_eq!(unique.values(), &[p1, p2, p3]);
20601 assert_eq!(unique.name(), Some("p"));
20602
20603 let dup_first = pi.duplicated(super::DuplicateKeep::First);
20604 assert_eq!(dup_first, vec![false, false, true, false, true, true]);
20605
20606 let dup_last = pi.duplicated(super::DuplicateKeep::Last);
20607 assert_eq!(dup_last, vec![true, true, true, false, false, false]);
20608
20609 let dup_none = pi.duplicated(super::DuplicateKeep::None);
20610 assert_eq!(dup_none, vec![true, true, true, false, true, true]);
20612
20613 let dropped = pi.drop_duplicates();
20614 assert_eq!(dropped.values(), &[p1, p2, p3]);
20615
20616 let counts = pi.value_counts();
20617 let total: usize = counts.iter().map(|(_, n)| n).sum();
20618 assert_eq!(total, pi.len());
20619 assert_eq!(counts[0].1, 3);
20621 let p1_count = counts
20622 .iter()
20623 .find_map(|(period, n)| (*period == p1).then_some(*n))
20624 .expect("p1 should be counted");
20625 assert_eq!(p1_count, 3);
20626
20627 let (codes, factor_uniques) = pi.factorize();
20628 assert_eq!(codes, vec![0, 1, 0, 2, 1, 0]);
20629 assert_eq!(factor_uniques.values(), &[p1, p2, p3]);
20630 }
20631
20632 #[test]
20633 fn period_index_unique_handles_empty_zke9k() {
20634 let pi = super::PeriodIndex::new(Vec::new());
20635 assert!(pi.unique().is_empty());
20636 assert!(pi.drop_duplicates().is_empty());
20637 assert!(pi.value_counts().is_empty());
20638 let (codes, uniques) = pi.factorize();
20639 assert!(codes.is_empty());
20640 assert!(uniques.is_empty());
20641 }
20642
20643 #[test]
20644 fn categorical_index_missingness_methods_are_closed_form_c0knj() {
20645 let cat = super::CategoricalIndex::from_values(
20646 vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
20647 false,
20648 );
20649 assert_eq!(cat.isnull(), vec![false, false, false]);
20650 assert_eq!(cat.notnull(), vec![true, true, true]);
20651 assert!(!cat.hasnans());
20652 let dropped = cat.dropna();
20653 assert_eq!(dropped.labels(), cat.labels());
20654 let filled = cat.fillna("z");
20655 assert_eq!(filled.labels(), cat.labels());
20656
20657 let empty = super::CategoricalIndex::from_values(Vec::<String>::new(), false);
20658 assert_eq!(empty.isnull(), Vec::<bool>::new());
20659 assert!(!empty.hasnans());
20660 }
20661
20662 #[test]
20663 fn categorical_index_append_delete_insert_repeat_match_pandas_tns52()
20664 -> Result<(), super::IndexError> {
20665 let cat = super::CategoricalIndex::with_categories(
20666 vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
20667 vec![
20668 "a".to_owned(),
20669 "b".to_owned(),
20670 "c".to_owned(),
20671 "d".to_owned(),
20672 ],
20673 false,
20674 )?
20675 .set_name("level");
20676
20677 let other = super::CategoricalIndex::with_categories(
20679 vec!["d".to_owned()],
20680 vec!["d".to_owned(), "e".to_owned()],
20681 false,
20682 )?
20683 .set_name("level");
20684 let merged = cat.append(&other);
20685 assert_eq!(
20686 merged.labels(),
20687 vec![
20688 "a".to_owned(),
20689 "b".to_owned(),
20690 "c".to_owned(),
20691 "d".to_owned()
20692 ]
20693 .as_slice()
20694 );
20695 assert_eq!(merged.name(), Some("level"));
20696 assert!(merged.categories().contains(&"e".to_owned()));
20697
20698 assert!(matches!(
20700 cat.delete(99).unwrap_err(),
20701 super::IndexError::OutOfBounds {
20702 position: 99,
20703 length: 3
20704 }
20705 ));
20706 let trimmed = cat.delete(0)?;
20707 assert_eq!(
20708 trimmed.labels(),
20709 vec!["b".to_owned(), "c".to_owned()].as_slice()
20710 );
20711
20712 let inserted = cat.insert(1, "d")?;
20714 assert_eq!(
20715 inserted.labels(),
20716 vec![
20717 "a".to_owned(),
20718 "d".to_owned(),
20719 "b".to_owned(),
20720 "c".to_owned()
20721 ]
20722 .as_slice()
20723 );
20724 assert!(cat.insert(1, "zzz").is_err());
20725
20726 let repeated = cat.repeat(2);
20728 assert_eq!(repeated.labels().len(), 6);
20729 assert_eq!(repeated.labels()[0], "a");
20730 assert_eq!(repeated.labels()[1], "a");
20731 assert_eq!(repeated.labels()[2], "b");
20732 Ok(())
20733 }
20734
20735 #[test]
20736 fn categorical_index_slice_locs_indexer_match_pandas_y93vb() -> Result<(), super::IndexError> {
20737 let cat = super::CategoricalIndex::with_categories(
20738 vec![
20739 "a".to_owned(),
20740 "b".to_owned(),
20741 "c".to_owned(),
20742 "d".to_owned(),
20743 ],
20744 vec![
20745 "a".to_owned(),
20746 "b".to_owned(),
20747 "c".to_owned(),
20748 "d".to_owned(),
20749 ],
20750 true,
20751 )?;
20752 assert_eq!(cat.slice_locs("b", "c")?, (1, 3));
20753 assert_eq!(cat.slice_indexer("b", "c")?, 1..3);
20754 assert_eq!(cat.slice_locs("a", "d")?, (0, 4));
20755
20756 let unsorted = super::CategoricalIndex::from_values(
20758 vec!["c".to_owned(), "a".to_owned(), "b".to_owned()],
20759 false,
20760 );
20761 assert!(unsorted.slice_locs("a", "c").is_err());
20762 Ok(())
20763 }
20764
20765 #[test]
20766 fn categorical_index_searchsorted_set_ops_match_pandas_cmvs7() -> Result<(), super::IndexError>
20767 {
20768 let cat = super::CategoricalIndex::with_categories(
20769 vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
20770 vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
20771 true,
20772 )?;
20773
20774 assert_eq!(cat.searchsorted("b", "left")?, 1);
20776 assert_eq!(cat.searchsorted("c", "right")?, 3);
20777 assert!(cat.searchsorted("b", "middle").is_err());
20778
20779 let other = super::CategoricalIndex::from_values(
20780 vec!["b".to_owned(), "c".to_owned(), "d".to_owned()],
20781 false,
20782 );
20783 assert_eq!(
20784 cat.intersection(&other).labels(),
20785 vec!["b".to_owned(), "c".to_owned()].as_slice()
20786 );
20787 assert_eq!(
20788 cat.union(&other).labels(),
20789 vec![
20790 "a".to_owned(),
20791 "b".to_owned(),
20792 "c".to_owned(),
20793 "d".to_owned(),
20794 ]
20795 .as_slice()
20796 );
20797 assert_eq!(
20798 cat.difference(&other).labels(),
20799 vec!["a".to_owned()].as_slice()
20800 );
20801 assert_eq!(
20803 cat.symmetric_difference(&other).labels(),
20804 vec!["a".to_owned(), "d".to_owned()].as_slice()
20805 );
20806 Ok(())
20807 }
20808
20809 #[test]
20810 fn categorical_index_argmax_argmin_match_pandas_d46wi() -> Result<(), super::IndexError> {
20811 let cat = super::CategoricalIndex::with_categories(
20812 vec!["b".to_owned(), "a".to_owned(), "c".to_owned()],
20813 vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
20814 true,
20815 )?;
20816 assert_eq!(cat.argmax()?, 2);
20817 assert_eq!(cat.argmin()?, 1);
20818
20819 let empty = super::CategoricalIndex::from_values(Vec::<String>::new(), false);
20820 assert!(empty.argmax().is_err());
20821 assert!(empty.argmin().is_err());
20822 Ok(())
20823 }
20824
20825 #[test]
20826 fn categorical_index_forwarders_match_pandas_e2p82() -> Result<(), super::IndexError> {
20827 let cat = super::CategoricalIndex::with_categories(
20828 vec![
20829 "b".to_owned(),
20830 "a".to_owned(),
20831 "c".to_owned(),
20832 "a".to_owned(),
20833 ],
20834 vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
20835 true,
20836 )?;
20837
20838 let positions = cat.argsort();
20840 let labels: Vec<&str> = positions
20841 .iter()
20842 .map(|&p| cat.labels()[p].as_str())
20843 .collect();
20844 for w in labels.windows(2) {
20845 assert!(w[0] <= w[1]);
20846 }
20847
20848 let taken = cat.take(&[2, 0, 0])?;
20850 assert_eq!(
20851 taken.labels(),
20852 vec!["c".to_owned(), "b".to_owned(), "b".to_owned()].as_slice()
20853 );
20854 assert!(matches!(
20855 cat.take(&[7]).unwrap_err(),
20856 super::IndexError::OutOfBounds {
20857 position: 7,
20858 length: 4
20859 }
20860 ));
20861
20862 assert_eq!(
20864 cat.isin(&["a".to_owned(), "z".to_owned()]),
20865 vec![false, true, false, true]
20866 );
20867
20868 assert_eq!(cat.get_loc("c")?, 2);
20870 assert!(cat.get_loc("zzz").is_err());
20871
20872 assert_eq!(cat.min(), Some("a"));
20874 assert_eq!(cat.max(), Some("c"));
20875
20876 let empty = super::CategoricalIndex::from_values(Vec::<String>::new(), false);
20878 assert_eq!(empty.min(), None);
20879 assert_eq!(empty.max(), None);
20880 assert!(empty.argsort().is_empty());
20881 Ok(())
20882 }
20883
20884 #[test]
20885 fn categorical_index_category_management_match_pandas_zy2vd() -> Result<(), super::IndexError> {
20886 let cat = super::CategoricalIndex::with_categories(
20887 vec!["a".to_owned(), "b".to_owned()],
20888 vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
20889 false,
20890 )?;
20891
20892 assert!(cat.as_ordered().ordered());
20894 assert!(!cat.as_ordered().as_unordered().ordered());
20895
20896 let added = cat.add_categories(vec!["d".to_owned()])?;
20898 assert_eq!(
20899 added.categories(),
20900 vec![
20901 "a".to_owned(),
20902 "b".to_owned(),
20903 "c".to_owned(),
20904 "d".to_owned()
20905 ]
20906 .as_slice()
20907 );
20908 assert!(cat.add_categories(vec!["a".to_owned()]).is_err());
20910
20911 let pruned = cat.remove_categories(&["c".to_owned()])?;
20913 assert_eq!(
20914 pruned.categories(),
20915 vec!["a".to_owned(), "b".to_owned()].as_slice()
20916 );
20917 assert!(cat.remove_categories(&["a".to_owned()]).is_err());
20919 assert!(cat.remove_categories(&["zzz".to_owned()]).is_err());
20921
20922 let trimmed = cat.remove_unused_categories();
20924 assert_eq!(
20925 trimmed.categories(),
20926 vec!["a".to_owned(), "b".to_owned()].as_slice()
20927 );
20928
20929 let extended = cat.set_categories(vec![
20931 "a".to_owned(),
20932 "b".to_owned(),
20933 "c".to_owned(),
20934 "d".to_owned(),
20935 ])?;
20936 assert_eq!(extended.categories().len(), 4);
20937 assert!(
20939 cat.set_categories(vec!["b".to_owned(), "c".to_owned()])
20940 .is_err()
20941 );
20942
20943 let renamed =
20945 cat.rename_categories(vec!["A".to_owned(), "B".to_owned(), "C".to_owned()])?;
20946 assert_eq!(
20947 renamed.labels(),
20948 vec!["A".to_owned(), "B".to_owned()].as_slice()
20949 );
20950 assert_eq!(
20951 renamed.categories(),
20952 vec!["A".to_owned(), "B".to_owned(), "C".to_owned()].as_slice()
20953 );
20954 assert!(cat.rename_categories(vec!["X".to_owned()]).is_err());
20956
20957 let reordered =
20959 cat.reorder_categories(vec!["c".to_owned(), "b".to_owned(), "a".to_owned()], true)?;
20960 assert!(reordered.ordered());
20961 assert_eq!(
20962 reordered.categories(),
20963 vec!["c".to_owned(), "b".to_owned(), "a".to_owned()].as_slice()
20964 );
20965 assert!(
20967 cat.reorder_categories(vec!["a".to_owned(), "b".to_owned(), "x".to_owned()], false)
20968 .is_err()
20969 );
20970 assert!(
20972 cat.reorder_categories(vec!["a".to_owned(), "a".to_owned(), "b".to_owned()], false)
20973 .is_err()
20974 );
20975
20976 Ok(())
20977 }
20978
20979 #[test]
20980 fn categorical_index_forwarder_methods_match_pandas_i1q1c() {
20981 let labels = vec![
20982 "low".to_owned(),
20983 "high".to_owned(),
20984 "low".to_owned(),
20985 "med".to_owned(),
20986 "high".to_owned(),
20987 "low".to_owned(),
20988 ];
20989 let categorical =
20990 super::CategoricalIndex::from_values(labels.clone(), false).set_name("level");
20991
20992 let unique = categorical.unique();
20994 assert_eq!(
20995 unique.labels(),
20996 vec!["low".to_owned(), "high".to_owned(), "med".to_owned()].as_slice()
20997 );
20998 assert_eq!(unique.name(), Some("level"));
20999
21000 let dup_first = categorical.duplicated(super::DuplicateKeep::First);
21002 assert_eq!(dup_first, vec![false, false, true, false, true, true]);
21003
21004 let dropped = categorical.drop_duplicates();
21006 assert_eq!(dropped.labels(), unique.labels());
21007
21008 let counts = categorical.value_counts();
21010 let total: usize = counts.iter().map(|(_, n)| n).sum();
21011 assert_eq!(total, categorical.len());
21012 let low_count = counts
21013 .iter()
21014 .find_map(|(label, n)| (label == "low").then_some(*n))
21015 .expect("low should be counted");
21016 assert_eq!(low_count, 3);
21017 assert_eq!(counts[0].1, 3);
21019
21020 let (codes, factor_uniques) = categorical.factorize();
21022 assert_eq!(codes.len(), categorical.len());
21023 assert_eq!(codes, vec![0, 1, 0, 2, 1, 0]);
21024 assert_eq!(factor_uniques.labels(), unique.labels());
21025 }
21026
21027 #[test]
21028 fn categorical_index_unique_preserves_categories_and_ordered_i1q1c() {
21029 let labels = vec!["a".to_owned(), "b".to_owned(), "a".to_owned()];
21030 let categories = vec!["a".to_owned(), "b".to_owned(), "c".to_owned()];
21031 let cat = super::CategoricalIndex::with_categories(labels, categories.clone(), true)
21032 .expect("with_categories");
21033 let unique = cat.unique();
21034 assert_eq!(unique.categories(), categories.as_slice());
21035 assert!(unique.ordered());
21036 }
21037
21038 #[test]
21039 fn timedelta_index_forwarder_methods_match_index_vq4pf() -> Result<(), super::IndexError> {
21040 let a: i64 = 1_000;
21041 let b: i64 = 2_000;
21042 let c: i64 = 3_000;
21043 let nat = fp_types::Timedelta::NAT;
21044
21045 let td = super::TimedeltaIndex::new(vec![a, c, b, a, nat, c]);
21047
21048 assert_eq!(td.argmax()?, 1);
21049 assert_eq!(td.argmin()?, 0);
21050
21051 let positions = td.argsort();
21052 assert_eq!(positions.len(), td.len());
21053
21054 let unique = td.unique()?;
21055 assert_eq!(unique.values(), vec![Some(a), Some(c), Some(b), None]);
21056
21057 let (codes, uniques) = td.factorize()?;
21058 assert_eq!(codes.len(), td.len());
21059 assert_eq!(uniques.values(), vec![Some(a), Some(c), Some(b)]);
21060 assert_eq!(codes[4], -1);
21061
21062 let counts = td.value_counts();
21063 let total: usize = counts.iter().map(|(_, n)| n).sum();
21064 assert_eq!(total, 5); let dup_first = td.duplicated(super::DuplicateKeep::First);
21067 assert_eq!(dup_first, vec![false, false, false, true, false, true]);
21068
21069 let deduped = td.drop_duplicates()?;
21070 assert_eq!(deduped.values(), vec![Some(a), Some(c), Some(b), None]);
21071
21072 let dropped = td.dropna();
21073 assert_eq!(
21074 dropped.values(),
21075 vec![Some(a), Some(c), Some(b), Some(a), Some(c)]
21076 );
21077 Ok(())
21078 }
21079
21080 #[test]
21081 fn timedelta_index_argmax_argmin_reject_empty_vq4pf() {
21082 let empty = super::TimedeltaIndex::new(vec![]);
21083 let err_max = empty.argmax().unwrap_err();
21084 assert!(matches!(
21085 err_max,
21086 super::IndexError::InvalidArgument(ref message)
21087 if message == "attempt to get argmax of an empty sequence"
21088 ));
21089 let err_min = empty.argmin().unwrap_err();
21090 assert!(matches!(
21091 err_min,
21092 super::IndexError::InvalidArgument(ref message)
21093 if message == "attempt to get argmin of an empty sequence"
21094 ));
21095
21096 let only_nat =
21097 super::TimedeltaIndex::new(vec![fp_types::Timedelta::NAT, fp_types::Timedelta::NAT]);
21098 assert!(only_nat.argmax().is_err());
21099 assert!(only_nat.argmin().is_err());
21100 }
21101
21102 #[test]
21103 fn timedelta_index_dropna_preserves_name_vq4pf() {
21104 let td =
21105 super::TimedeltaIndex::new(vec![fp_types::Timedelta::NAT, 0_i64]).set_name("delta");
21106 let dropped = td.dropna();
21107 assert_eq!(dropped.values(), vec![Some(0)]);
21108 assert_eq!(dropped.name(), Some("delta"));
21109 }
21110
21111 #[test]
21112 fn datetime_index_forwarder_methods_match_index_z9guv() -> Result<(), super::IndexError> {
21113 const NS: i64 = 1_000_000_000;
21114 let a = 1_704_067_200_i64 * NS;
21115 let b = 1_705_276_800_i64 * NS;
21116 let c = 1_706_140_800_i64 * NS;
21117
21118 let dt = super::DatetimeIndex::new(vec![a, c, b, a, i64::MIN, c]);
21120
21121 assert_eq!(dt.argmax()?, 1); assert_eq!(dt.argmin()?, 0); let positions = dt.argsort();
21128 assert_eq!(positions.len(), dt.len());
21129 let sorted_labels: Vec<&super::IndexLabel> = positions
21130 .iter()
21131 .map(|&p| &dt.as_index().labels()[p])
21132 .collect();
21133 for w in sorted_labels.windows(2) {
21134 assert!(w[0].cmp(w[1]).is_le());
21135 }
21136
21137 let unique = dt.unique()?;
21138 assert_eq!(unique.values(), vec![Some(a), Some(c), Some(b), None]);
21140
21141 let (codes, uniques) = dt.factorize()?;
21142 assert_eq!(codes.len(), dt.len());
21143 assert_eq!(uniques.values(), vec![Some(a), Some(c), Some(b)]);
21146 assert_eq!(codes[4], -1);
21148
21149 let counts = dt.value_counts();
21150 let total_count: usize = counts.iter().map(|(_, n)| n).sum();
21153 assert_eq!(total_count, 5);
21154 let a_count = counts
21155 .iter()
21156 .find_map(|(label, n)| match label {
21157 super::IndexLabel::Datetime64(nanos) if *nanos == a => Some(*n),
21158 _ => None,
21159 })
21160 .expect("a should be counted");
21161 assert_eq!(a_count, 2);
21162
21163 let dup_first = dt.duplicated(super::DuplicateKeep::First);
21164 assert_eq!(dup_first, vec![false, false, false, true, false, true]);
21166
21167 let deduped = dt.drop_duplicates()?;
21168 assert_eq!(deduped.values(), vec![Some(a), Some(c), Some(b), None]);
21170
21171 let dropped = dt.dropna();
21172 assert_eq!(
21173 dropped.values(),
21174 vec![Some(a), Some(c), Some(b), Some(a), Some(c)]
21175 );
21176 Ok(())
21177 }
21178
21179 #[test]
21180 fn datetime_index_argmax_argmin_reject_empty_z9guv() {
21181 let empty = super::DatetimeIndex::new(vec![]);
21182 let err_max = empty.argmax().unwrap_err();
21183 assert!(matches!(
21184 err_max,
21185 super::IndexError::InvalidArgument(ref message)
21186 if message == "attempt to get argmax of an empty sequence"
21187 ));
21188 let err_min = empty.argmin().unwrap_err();
21189 assert!(matches!(
21190 err_min,
21191 super::IndexError::InvalidArgument(ref message)
21192 if message == "attempt to get argmin of an empty sequence"
21193 ));
21194 assert!(empty.argsort().is_empty());
21195 assert!(empty.dropna().is_empty());
21196 }
21197
21198 #[test]
21199 fn datetime_index_dropna_preserves_name_z9guv() {
21200 let dt = super::DatetimeIndex::new(vec![i64::MIN, 0_i64, i64::MIN]).set_name("ts");
21201 let dropped = dt.dropna();
21202 assert_eq!(dropped.values(), vec![Some(0)]);
21203 assert_eq!(dropped.name(), Some("ts"));
21204 }
21205
21206 #[test]
21207 fn datetime_index_asi8_round_trips_nanos_teeck() {
21208 const NS: i64 = 1_000_000_000;
21209 let total: i64 = 1_704_067_200_i64 * NS + 123;
21210 let dt = super::DatetimeIndex::new(vec![total, i64::MIN, 0]);
21211 assert_eq!(dt.asi8(), vec![total, i64::MIN, 0]);
21212
21213 let empty = super::DatetimeIndex::new(vec![]);
21214 assert!(empty.asi8().is_empty());
21215 }
21216
21217 #[test]
21218 fn datetime_index_strftime_formats_each_label_teeck() {
21219 const NS: i64 = 1_000_000_000;
21220 let with_ms: i64 = 1_705_322_096_i64 * NS + 789_000_000;
21226 let dt = super::DatetimeIndex::new(vec![with_ms, i64::MIN]);
21227 let formatted = dt.strftime("%Y-%m-%dT%H:%M:%S%.3f");
21228 assert_eq!(
21229 formatted,
21230 vec![Some("2024-01-15T12:34:56.789".to_owned()), None]
21231 );
21232 }
21233
21234 #[test]
21235 fn timedelta_index_asi8_microseconds_nanoseconds_match_pandas_teeck() -> Result<(), &'static str>
21236 {
21237 let one_day = fp_types::Timedelta::NANOS_PER_DAY;
21239 let extra = 2 * fp_types::Timedelta::NANOS_PER_HOUR
21240 + 34 * fp_types::Timedelta::NANOS_PER_MIN
21241 + 56 * fp_types::Timedelta::NANOS_PER_SEC
21242 + 789_012_345;
21243 let total = one_day + extra;
21244 let td = super::TimedeltaIndex::new(vec![total, fp_types::Timedelta::NAT, 0, -1]);
21245
21246 assert_eq!(td.asi8(), vec![total, fp_types::Timedelta::NAT, 0, -1]);
21247 assert_eq!(
21249 td.microseconds(),
21250 vec![Some(789_012), None, Some(0), Some(999_999)]
21251 );
21252 assert_eq!(td.nanoseconds(), vec![Some(345), None, Some(0), Some(999)]);
21254
21255 let components = td.components();
21256 let positive = components
21257 .first()
21258 .copied()
21259 .flatten()
21260 .ok_or("positive components")?;
21261 assert_eq!(positive.days, 1);
21262 assert_eq!(positive.hours, 2);
21263 assert_eq!(positive.minutes, 34);
21264 assert_eq!(positive.seconds, 56);
21265 assert_eq!(positive.milliseconds, 789);
21266 assert_eq!(positive.microseconds, 12);
21267 assert_eq!(positive.nanoseconds, 345);
21268
21269 assert_eq!(
21270 components.get(1).copied().flatten().map(|row| row.days),
21271 None
21272 );
21273
21274 let zero = components
21275 .get(2)
21276 .copied()
21277 .flatten()
21278 .ok_or("zero components")?;
21279 assert_eq!(zero.days, 0);
21280 assert_eq!(zero.hours, 0);
21281 assert_eq!(zero.minutes, 0);
21282 assert_eq!(zero.seconds, 0);
21283 assert_eq!(zero.milliseconds, 0);
21284 assert_eq!(zero.microseconds, 0);
21285 assert_eq!(zero.nanoseconds, 0);
21286
21287 let negative = components
21288 .get(3)
21289 .copied()
21290 .flatten()
21291 .ok_or("negative components")?;
21292 assert_eq!(negative.days, -1);
21293 assert_eq!(negative.hours, 23);
21294 assert_eq!(negative.minutes, 59);
21295 assert_eq!(negative.seconds, 59);
21296 assert_eq!(negative.milliseconds, 999);
21297 assert_eq!(negative.microseconds, 999);
21298 assert_eq!(negative.nanoseconds, 999);
21299
21300 Ok(())
21301 }
21302
21303 #[test]
21304 fn datetime_index_month_name_and_day_name_match_pandas_fqkiu() {
21305 const NS: i64 = 1_000_000_000;
21308 let mon_jan: i64 = 1_705_276_800_i64 * NS;
21309 let tue_dec: i64 = 1_735_603_200_i64 * NS;
21310 let dt = super::DatetimeIndex::new(vec![mon_jan, tue_dec, i64::MIN]);
21311
21312 assert_eq!(
21313 dt.month_name(),
21314 vec![
21315 Some("January".to_owned()),
21316 Some("December".to_owned()),
21317 None
21318 ]
21319 );
21320 assert_eq!(
21321 dt.day_name(),
21322 vec![Some("Monday".to_owned()), Some("Tuesday".to_owned()), None]
21323 );
21324 }
21325
21326 #[test]
21327 fn datetime_index_normalize_truncates_to_midnight_utc_fqkiu() {
21328 const NS: i64 = 1_000_000_000;
21329 let mid_day: i64 = 1_705_276_800_i64 * NS + 12 * 3600 * NS + 34 * 60 * NS + 56 * NS + 789;
21331 let midnight: i64 = 1_705_795_200_i64 * NS;
21332 let nat = i64::MIN;
21333
21334 let dt = super::DatetimeIndex::new(vec![mid_day, midnight, nat]).set_name("when");
21335 let normed = dt.normalize();
21336
21337 assert_eq!(
21339 normed.values(),
21340 vec![Some(1_705_276_800_i64 * NS), Some(midnight), None]
21341 );
21342 assert_eq!(normed.name(), Some("when"));
21343 assert!(normed.is_normalized());
21344 }
21345
21346 #[test]
21347 fn datetime_index_is_normalized_returns_false_when_any_non_midnight_fqkiu() {
21348 const NS: i64 = 1_000_000_000;
21349 let mid_day: i64 = 1_705_276_800_i64 * NS + 1; let midnight: i64 = 1_705_795_200_i64 * NS;
21351 let mixed = super::DatetimeIndex::new(vec![midnight, mid_day]);
21352 assert!(!mixed.is_normalized());
21353
21354 let only_midnight = super::DatetimeIndex::new(vec![midnight]);
21355 assert!(only_midnight.is_normalized());
21356
21357 let only_nat = super::DatetimeIndex::new(vec![i64::MIN, i64::MIN]);
21358 assert!(only_nat.is_normalized());
21359
21360 let empty = super::DatetimeIndex::new(vec![]);
21361 assert!(empty.is_normalized());
21362 }
21363
21364 #[test]
21365 fn datetime_index_feb_28_in_non_leap_year_is_month_end_qy7yd() {
21366 let feb_28_2023: i64 = 1_677_542_400_i64 * 1_000_000_000;
21368 let dt = super::DatetimeIndex::new(vec![feb_28_2023]);
21369 assert_eq!(dt.is_month_end(), vec![Some(true)]);
21370 }
21371
21372 #[test]
21373 fn datetime_index_leap_year_century_rule_k860x() {
21374 let y2000: i64 = 960_076_800 * 1_000_000_000;
21377 let y2100: i64 = 4_117_046_400 * 1_000_000_000;
21378 let y2024feb: i64 = 1_708_002_000 * 1_000_000_000;
21379 let y2023feb: i64 = 1_676_466_000 * 1_000_000_000;
21380
21381 let dt = super::DatetimeIndex::new(vec![y2000, y2100, y2024feb, y2023feb]);
21382 assert_eq!(
21383 dt.is_leap_year(),
21384 vec![Some(true), Some(false), Some(true), Some(false)]
21385 );
21386 let dim = dt.days_in_month();
21388 assert_eq!(dim[2], Some(29));
21389 assert_eq!(dim[3], Some(28));
21390 }
21391
21392 #[test]
21393 fn datetime_index_time_of_day_accessors_handle_empty_znejf() {
21394 let dt = super::DatetimeIndex::new(vec![]);
21395 assert!(dt.hour().is_empty());
21396 assert!(dt.minute().is_empty());
21397 assert!(dt.second().is_empty());
21398 assert!(dt.microsecond().is_empty());
21399 assert!(dt.nanosecond().is_empty());
21400 }
21401
21402 #[test]
21403 fn range_index_missingness_methods_are_closed_form_a4fih() {
21404 let asc = super::RangeIndex::new(0, 5, 1).unwrap();
21405 assert_eq!(asc.isna(), vec![false; 5]);
21406 assert_eq!(asc.isnull(), vec![false; 5]);
21407 assert_eq!(asc.notna(), vec![true; 5]);
21408 assert_eq!(asc.notnull(), vec![true; 5]);
21409 assert!(!asc.hasnans());
21410 assert!(asc.dropna().equals(&asc));
21411 assert!(asc.fillna(99).equals(&asc));
21412
21413 let desc = super::RangeIndex::new(10, 0, -2).unwrap();
21414 assert_eq!(desc.isna().len(), desc.len());
21415 assert!(!desc.hasnans());
21416 assert!(desc.dropna().equals(&desc));
21417
21418 let empty = super::RangeIndex::new(0, 0, 1).unwrap();
21419 assert_eq!(empty.isna(), Vec::<bool>::new());
21420 assert_eq!(empty.notna(), Vec::<bool>::new());
21421 assert!(!empty.hasnans());
21422 assert!(empty.dropna().is_empty());
21423 assert!(empty.fillna(0).is_empty());
21424 }
21425
21426 #[test]
21427 fn range_index_format_stringifies_each_value_a4fih() {
21428 let asc = super::RangeIndex::new(0, 4, 1).unwrap();
21429 assert_eq!(asc.format(), vec!["0", "1", "2", "3"]);
21430
21431 let desc = super::RangeIndex::new(5, 0, -2).unwrap();
21432 assert_eq!(desc.format(), vec!["5", "3", "1"]);
21433
21434 let empty = super::RangeIndex::new(0, 0, 1).unwrap();
21435 assert_eq!(empty.format(), Vec::<String>::new());
21436 }
21437
21438 #[test]
21439 fn range_index_factorize_is_identity_a4fih() {
21440 let asc = super::RangeIndex::new(0, 5, 1).unwrap();
21441 let (codes, uniques) = asc.factorize();
21442 assert_eq!(codes, vec![0, 1, 2, 3, 4]);
21443 assert!(uniques.equals(&asc));
21444
21445 let desc = super::RangeIndex::new(10, 0, -2).unwrap();
21446 let (desc_codes, desc_uniques) = desc.factorize();
21447 assert_eq!(desc_codes, (0..desc.len()).collect::<Vec<_>>());
21448 assert!(desc_uniques.equals(&desc));
21449
21450 let empty = super::RangeIndex::new(0, 0, 1).unwrap();
21451 let (empty_codes, empty_uniques) = empty.factorize();
21452 assert!(empty_codes.is_empty());
21453 assert!(empty_uniques.is_empty());
21454 }
21455
21456 #[test]
21457 fn range_index_duplicated_drop_duplicates_are_no_ops_mrchb() {
21458 let asc = super::RangeIndex::new(0, 5, 1).unwrap();
21459 for keep in [
21460 super::DuplicateKeep::First,
21461 super::DuplicateKeep::Last,
21462 super::DuplicateKeep::None,
21463 ] {
21464 assert_eq!(asc.duplicated(keep), vec![false; asc.len()]);
21465 }
21466 let cloned = asc.drop_duplicates();
21467 assert!(cloned.equals(&asc));
21468 assert_eq!(cloned.len(), asc.len());
21469
21470 let empty = super::RangeIndex::new(0, 0, 1).unwrap();
21471 assert_eq!(
21472 empty.duplicated(super::DuplicateKeep::First),
21473 Vec::<bool>::new()
21474 );
21475 assert!(empty.drop_duplicates().is_empty());
21476 }
21477
21478 #[test]
21479 fn multi_index_asof_rejects_tuple_comparison_d89fe13() -> Result<(), super::IndexError> {
21480 let string_level = MultiIndex::from_tuples(vec![
21481 vec!["a".into(), 1_i64.into()],
21482 vec!["b".into(), 2_i64.into()],
21483 ])?;
21484 let int_level = MultiIndex::from_tuples(vec![
21485 vec![1_i64.into(), "a".into()],
21486 vec![2_i64.into(), "b".into()],
21487 ])?;
21488
21489 let string_err = string_level
21490 .asof(&[IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)])
21491 .unwrap_err();
21492 let int_err = int_level
21493 .asof(&[IndexLabel::Int64(1), IndexLabel::Utf8("a".into())])
21494 .unwrap_err();
21495
21496 assert!(matches!(
21497 string_err,
21498 super::IndexError::InvalidArgument(message)
21499 if message == "'<' not supported between instances of 'tuple' and 'str'"
21500 ));
21501 assert!(matches!(
21502 int_err,
21503 super::IndexError::InvalidArgument(message)
21504 if message == "'<' not supported between instances of 'tuple' and 'int'"
21505 ));
21506 assert_eq!(MultiIndex::from_tuples(Vec::new())?.asof(&[])?, None);
21507
21508 Ok(())
21509 }
21510
21511 #[test]
21512 fn multi_index_asof_locs_rejects_mask_and_broadcast_paths_d89fe14()
21513 -> Result<(), super::IndexError> {
21514 let source = MultiIndex::from_tuples(vec![
21515 vec!["a".into(), 1_i64.into()],
21516 vec!["a".into(), 3_i64.into()],
21517 vec!["b".into(), 2_i64.into()],
21518 ])?;
21519 let where_index = MultiIndex::from_tuples(vec![
21520 vec!["a".into(), 0_i64.into()],
21521 vec!["a".into(), 2_i64.into()],
21522 vec!["b".into(), 2_i64.into()],
21523 ])?;
21524
21525 let no_mask = source.asof_locs(&where_index, None).unwrap_err();
21526 let mismatched_mask = source
21527 .asof_locs(&where_index, Some(&[true, true]))
21528 .unwrap_err();
21529 let empty_take = source
21530 .asof_locs(&where_index, Some(&[false, false, false]))
21531 .unwrap_err();
21532 let broadcast = source
21533 .asof_locs(&where_index, Some(&[true, false, true]))
21534 .unwrap_err();
21535 let empty_source = MultiIndex::from_arrays(vec![Vec::new(), Vec::new()])?;
21536 let empty_mask = empty_source
21537 .asof_locs(&empty_source, Some(&[]))
21538 .unwrap_err();
21539
21540 assert!(matches!(
21541 no_mask,
21542 super::IndexError::InvalidArgument(message)
21543 if message == "object too deep for desired array"
21544 ));
21545 assert!(matches!(
21546 mismatched_mask,
21547 super::IndexError::InvalidArgument(message)
21548 if message == "boolean index did not match indexed array along axis 0; size of axis is 3 but size of corresponding boolean axis is 2"
21549 ));
21550 assert!(matches!(
21551 empty_take,
21552 super::IndexError::InvalidArgument(message)
21553 if message == "cannot do a non-empty take from an empty axes."
21554 ));
21555 assert!(matches!(
21556 broadcast,
21557 super::IndexError::InvalidArgument(message)
21558 if message == "operands could not be broadcast together with shapes (3,) (2,)"
21559 ));
21560 assert!(matches!(
21561 empty_mask,
21562 super::IndexError::InvalidArgument(message)
21563 if message == "attempt to get argmax of an empty sequence"
21564 ));
21565
21566 Ok(())
21567 }
21568
21569 #[test]
21570 fn multi_index_drop_duplicates_append_repeat_and_dropna() {
21571 let left = MultiIndex::from_tuples(vec![
21572 vec!["a".into(), 1_i64.into()],
21573 vec!["a".into(), 1_i64.into()],
21574 vec!["b".into(), 2_i64.into()],
21575 vec![IndexLabel::Datetime64(i64::MIN), 3_i64.into()],
21576 vec![
21577 IndexLabel::Datetime64(i64::MIN),
21578 IndexLabel::Timedelta64(Timedelta::NAT),
21579 ],
21580 ])
21581 .unwrap()
21582 .set_names(vec![Some("letter".into()), Some("number".into())]);
21583
21584 assert_eq!(
21585 left.drop_duplicates().to_list(),
21586 vec![
21587 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21588 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
21589 vec![IndexLabel::Datetime64(i64::MIN), IndexLabel::Int64(3)],
21590 vec![
21591 IndexLabel::Datetime64(i64::MIN),
21592 IndexLabel::Timedelta64(Timedelta::NAT),
21593 ],
21594 ]
21595 );
21596 assert_eq!(
21597 left.dropna().to_list(),
21598 vec![
21599 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21600 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21601 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
21602 ]
21603 );
21604 assert_eq!(left.dropna_all().len(), 4);
21605
21606 let right = MultiIndex::from_tuples(vec![vec!["c".into(), 3_i64.into()]])
21607 .unwrap()
21608 .set_names(vec![Some("letter".into()), Some("other".into())]);
21609 let appended = left.append(&right).unwrap();
21610 assert_eq!(appended.len(), 6);
21611 assert_eq!(appended.names(), &[Some("letter".into()), None]);
21612
21613 let repeated = right.repeat(2);
21614 assert_eq!(
21615 repeated.to_list(),
21616 vec![
21617 vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)],
21618 vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)],
21619 ]
21620 );
21621 assert_eq!(right.repeat(0).len(), 0);
21622 }
21623
21624 #[test]
21625 fn multi_index_insert_delete_and_drop_tuples() {
21626 let mi = MultiIndex::from_tuples(vec![
21627 vec!["b".into(), 2_i64.into()],
21628 vec!["a".into(), 2_i64.into()],
21629 vec!["a".into(), 1_i64.into()],
21630 vec!["b".into(), 2_i64.into()],
21631 ])
21632 .unwrap()
21633 .set_names(vec![Some("letter".into()), Some("number".into())]);
21634
21635 let inserted = mi.insert(1, vec!["z".into(), 9_i64.into()]).unwrap();
21636 assert_eq!(
21637 inserted.to_list(),
21638 vec![
21639 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
21640 vec![IndexLabel::Utf8("z".into()), IndexLabel::Int64(9)],
21641 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)],
21642 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21643 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
21644 ]
21645 );
21646 assert_eq!(inserted.names(), mi.names());
21647
21648 let deleted = inserted.delete(1).unwrap();
21649 assert_eq!(deleted, mi);
21650 assert!(mi.insert(0, vec!["short".into()]).is_err());
21651 assert!(mi.delete(99).is_err());
21652
21653 let dropped = mi
21654 .drop(&[vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)]])
21655 .unwrap();
21656 assert_eq!(
21657 dropped.to_list(),
21658 vec![
21659 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)],
21660 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21661 ]
21662 );
21663 assert!(
21664 mi.drop(&[vec![
21665 IndexLabel::Utf8("missing".into()),
21666 IndexLabel::Int64(0)
21667 ]])
21668 .is_err()
21669 );
21670 }
21671
21672 #[test]
21673 fn multi_index_factorize_sort_and_reduce_tuples() {
21674 let mi = MultiIndex::from_tuples(vec![
21675 vec!["b".into(), 2_i64.into()],
21676 vec!["a".into(), 2_i64.into()],
21677 vec!["a".into(), 1_i64.into()],
21678 vec!["b".into(), 2_i64.into()],
21679 vec!["a".into(), 2_i64.into()],
21680 vec!["c".into(), 3_i64.into()],
21681 ])
21682 .unwrap()
21683 .set_names(vec![Some("letter".into()), Some("number".into())]);
21684
21685 let (codes, uniques) = mi.factorize();
21686 assert_eq!(codes, vec![0, 1, 2, 0, 1, 3]);
21687 assert_eq!(
21688 uniques.to_list(),
21689 vec![
21690 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
21691 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)],
21692 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21693 vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)],
21694 ]
21695 );
21696 assert_eq!(uniques.names(), mi.names());
21697 assert_eq!(mi.unique(), uniques);
21698 assert_eq!(mi.nunique(), 4);
21699 assert_eq!(
21700 mi.value_counts(),
21701 vec![
21702 (vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)], 2),
21703 (vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)], 2),
21704 (vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)], 1),
21705 (vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)], 1),
21706 ]
21707 );
21708
21709 let sorted = mi.sort_values();
21710 assert_eq!(
21711 sorted.to_list(),
21712 vec![
21713 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21714 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)],
21715 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)],
21716 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
21717 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(2)],
21718 vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)],
21719 ]
21720 );
21721 let (sortlevel, order) = mi.sortlevel();
21722 assert_eq!(sortlevel, sorted);
21723 assert_eq!(order, vec![2, 1, 4, 0, 3, 5]);
21724 assert_eq!(mi.sort(), sorted);
21725 assert_eq!(
21726 mi.min().unwrap(),
21727 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)]
21728 );
21729 assert_eq!(
21730 mi.max().unwrap(),
21731 vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)]
21732 );
21733 assert_eq!(mi.argmin(), Some(2));
21734 assert_eq!(mi.argmax(), Some(5));
21735
21736 let empty = MultiIndex::from_tuples(Vec::new()).unwrap();
21737 assert_eq!(empty.min(), None);
21738 assert_eq!(empty.max(), None);
21739 assert_eq!(empty.argmin(), None);
21740 assert_eq!(empty.argmax(), None);
21741 }
21742
21743 #[test]
21744 fn multi_index_tuple_set_ops_preserve_order_and_shared_names() {
21745 let left = MultiIndex::from_tuples(vec![
21746 vec!["a".into(), 1_i64.into()],
21747 vec!["a".into(), 2_i64.into()],
21748 vec!["b".into(), 1_i64.into()],
21749 vec!["a".into(), 1_i64.into()],
21750 ])
21751 .unwrap()
21752 .set_names(vec![Some("letter".into()), Some("number".into())]);
21753 let right = MultiIndex::from_tuples(vec![
21754 vec!["a".into(), 2_i64.into()],
21755 vec!["c".into(), 3_i64.into()],
21756 ])
21757 .unwrap()
21758 .set_names(vec![Some("letter".into()), Some("other".into())]);
21759
21760 let intersection = left.intersection(&right).unwrap();
21761 assert_eq!(
21762 intersection.to_list(),
21763 vec![vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)]]
21764 );
21765 assert_eq!(intersection.names(), &[Some("letter".into()), None]);
21766
21767 assert_eq!(
21768 left.union(&right).unwrap().to_list(),
21769 vec![
21770 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21771 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(2)],
21772 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(1)],
21773 vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)],
21774 ]
21775 );
21776 assert_eq!(
21777 left.difference(&right).unwrap().to_list(),
21778 vec![
21779 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21780 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(1)],
21781 ]
21782 );
21783 assert_eq!(
21784 left.symmetric_difference(&right).unwrap().to_list(),
21785 vec![
21786 vec![IndexLabel::Utf8("a".into()), IndexLabel::Int64(1)],
21787 vec![IndexLabel::Utf8("b".into()), IndexLabel::Int64(1)],
21788 vec![IndexLabel::Utf8("c".into()), IndexLabel::Int64(3)],
21789 ]
21790 );
21791 }
21792
21793 #[test]
21794 fn multi_index_reorder_levels() {
21795 let mi = MultiIndex::from_tuples(vec![
21796 vec!["a".into(), 1_i64.into(), "x".into()],
21797 vec!["b".into(), 2_i64.into(), "y".into()],
21798 ])
21799 .unwrap()
21800 .set_names(vec![
21801 Some("letter".into()),
21802 Some("number".into()),
21803 Some("code".into()),
21804 ]);
21805
21806 let reordered = mi.reorder_levels(&[2, 0, 1]).unwrap();
21808 assert_eq!(reordered.nlevels(), 3);
21809 assert_eq!(
21810 reordered.names(),
21811 &[
21812 Some("code".into()),
21813 Some("letter".into()),
21814 Some("number".into())
21815 ]
21816 );
21817
21818 let tuple = reordered.get_tuple(0).unwrap();
21820 assert_eq!(tuple[0], &IndexLabel::Utf8("x".into()));
21821 assert_eq!(tuple[1], &IndexLabel::Utf8("a".into()));
21822 assert_eq!(tuple[2], &IndexLabel::Int64(1));
21823 }
21824
21825 #[test]
21826 fn multi_index_reorder_levels_identity() {
21827 let mi = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]]).unwrap();
21828
21829 let same = mi.reorder_levels(&[0, 1]).unwrap();
21831 assert_eq!(same, mi);
21832 }
21833
21834 #[test]
21835 fn multi_index_reorder_levels_wrong_length_errors() {
21836 let mi = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]]).unwrap();
21837
21838 assert!(mi.reorder_levels(&[0]).is_err());
21839 assert!(mi.reorder_levels(&[0, 1, 2]).is_err());
21840 }
21841
21842 #[test]
21843 fn multi_index_reorder_levels_duplicate_index_errors() {
21844 let mi = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]]).unwrap();
21845
21846 assert!(mi.reorder_levels(&[0, 0]).is_err());
21847 }
21848
21849 #[test]
21850 fn multi_index_reorder_levels_out_of_bounds_errors() {
21851 let mi = MultiIndex::from_tuples(vec![vec!["a".into(), 1_i64.into()]]).unwrap();
21852
21853 assert!(mi.reorder_levels(&[0, 5]).is_err());
21854 }
21855
21856 #[test]
21859 fn multi_index_is_monotonic_increasing_on_sorted() {
21860 let mi = MultiIndex::from_tuples(vec![
21862 vec!["A".into(), 1_i64.into()],
21863 vec!["A".into(), 2_i64.into()],
21864 vec!["B".into(), 1_i64.into()],
21865 ])
21866 .unwrap();
21867 assert!(mi.is_monotonic_increasing());
21868 assert!(mi.is_lexsorted());
21869 assert!(!mi.is_monotonic_decreasing());
21870 }
21871
21872 #[test]
21873 fn multi_index_is_monotonic_decreasing_on_reverse_sorted() {
21874 let mi = MultiIndex::from_tuples(vec![
21876 vec!["B".into(), 2_i64.into()],
21877 vec!["B".into(), 1_i64.into()],
21878 vec!["A".into(), 1_i64.into()],
21879 ])
21880 .unwrap();
21881 assert!(mi.is_monotonic_decreasing());
21882 assert!(!mi.is_monotonic_increasing());
21883 }
21884
21885 #[test]
21886 fn multi_index_is_monotonic_both_directions_on_constant_inner() {
21887 let mi = MultiIndex::from_tuples(vec![
21889 vec!["A".into(), 1_i64.into()],
21890 vec!["A".into(), 1_i64.into()],
21891 ])
21892 .unwrap();
21893 assert!(mi.is_monotonic_increasing());
21894 assert!(mi.is_monotonic_decreasing());
21895 }
21896
21897 #[test]
21898 fn multi_index_empty_is_monotonic() {
21899 let mi = MultiIndex::from_tuples(Vec::new()).unwrap();
21900 assert!(mi.is_monotonic_increasing());
21901 assert!(mi.is_monotonic_decreasing());
21902 assert!(mi.is_lexsorted());
21903 }
21904
21905 #[test]
21906 fn multi_index_single_row_is_monotonic() {
21907 let mi = MultiIndex::from_tuples(vec![vec!["A".into(), 1_i64.into()]]).unwrap();
21908 assert!(mi.is_monotonic_increasing());
21909 assert!(mi.is_monotonic_decreasing());
21910 assert!(mi.is_lexsorted());
21911 }
21912
21913 #[test]
21914 fn multi_index_unsorted_is_neither() {
21915 let mi = MultiIndex::from_tuples(vec![
21917 vec!["B".into(), 1_i64.into()],
21918 vec!["A".into(), 2_i64.into()],
21919 vec!["B".into(), 2_i64.into()],
21920 ])
21921 .unwrap();
21922 assert!(!mi.is_monotonic_increasing());
21923 assert!(!mi.is_monotonic_decreasing());
21924 assert!(!mi.is_lexsorted());
21925 }
21926
21927 #[test]
21928 fn multi_index_outer_ascending_inner_descending_is_not_monotonic() {
21929 let mi = MultiIndex::from_tuples(vec![
21931 vec!["A".into(), 5_i64.into()],
21932 vec!["A".into(), 1_i64.into()],
21933 vec!["B".into(), 3_i64.into()],
21934 ])
21935 .unwrap();
21936 assert!(!mi.is_monotonic_increasing());
21938 assert!(!mi.is_monotonic_decreasing());
21940 }
21941
21942 #[test]
21943 fn index_lookup_methods_match_pandas() {
21944 use super::{Index, IndexLabel};
21945 let i = Index::new(vec![
21946 IndexLabel::Int64(1),
21947 IndexLabel::Int64(3),
21948 IndexLabel::Int64(5),
21949 IndexLabel::Int64(7),
21950 ]);
21951
21952 let target = Index::new(vec![
21955 IndexLabel::Int64(2),
21956 IndexLabel::Int64(3),
21957 IndexLabel::Int64(6),
21958 IndexLabel::Int64(7),
21959 ]);
21960 assert_eq!(
21961 i.get_indexer(&target),
21962 vec![None, Some(1), None, Some(3)],
21963 "get_indexer exact"
21964 );
21965
21966 assert_eq!(i.searchsorted(&IndexLabel::Int64(3), "left").unwrap(), 1);
21968 assert_eq!(i.searchsorted(&IndexLabel::Int64(3), "right").unwrap(), 2);
21969 assert_eq!(i.searchsorted(&IndexLabel::Int64(4), "left").unwrap(), 2);
21970 assert_eq!(i.searchsorted(&IndexLabel::Int64(8), "left").unwrap(), 4);
21971 assert_eq!(i.searchsorted(&IndexLabel::Int64(0), "left").unwrap(), 0);
21972
21973 assert_eq!(
21975 i.asof(&IndexLabel::Int64(4)),
21976 Some(IndexLabel::Int64(3)),
21977 "asof 4"
21978 );
21979 assert_eq!(i.asof(&IndexLabel::Int64(0)), None, "asof before all");
21980 assert_eq!(
21981 i.asof(&IndexLabel::Int64(7)),
21982 Some(IndexLabel::Int64(7)),
21983 "asof exact"
21984 );
21985 assert_eq!(
21986 i.asof(&IndexLabel::Int64(10)),
21987 Some(IndexLabel::Int64(7)),
21988 "asof after all"
21989 );
21990
21991 let f = Index::new(vec![
21994 IndexLabel::Utf8("b".into()),
21995 IndexLabel::Utf8("a".into()),
21996 IndexLabel::Utf8("b".into()),
21997 IndexLabel::Utf8("c".into()),
21998 ]);
21999 let (codes, uniques) = f.factorize();
22000 assert_eq!(codes, vec![0_isize, 1, 0, 2], "factorize codes");
22001 assert_eq!(
22002 uniques.labels(),
22003 &[
22004 IndexLabel::Utf8("b".into()),
22005 IndexLabel::Utf8("a".into()),
22006 IndexLabel::Utf8("c".into())
22007 ],
22008 "factorize uniques"
22009 );
22010 }
22011}