1#![forbid(unsafe_code)]
2#![warn(rustdoc::broken_intra_doc_links)]
3
4use std::sync::{Arc, OnceLock};
58
59use fp_types::{
60 DType, Interval, IntervalClosed, NullKind, Scalar, SparseDType, Timedelta, Timestamp,
61 TypeError, cast_scalar, cast_scalar_owned, common_dtype, infer_dtype, nanall, nanany,
62 nanargmax, nanargmin, nancummax, nancummin, nancumprod, nancumsum, nankurt, nanmax, nanmean,
63 nanmedian, nanmin, nannunique, nanprod, nanptp, nanquantile, nansem, nanskew, nanstd, nansum,
64 nanvar,
65};
66use rustc_hash::{FxHashMap, FxHashSet};
67use serde::{Deserialize, Serialize};
68use thiserror::Error;
69
70#[derive(Debug, Clone, Eq)]
71pub struct ValidityMask {
72 words: Vec<u64>,
73 len: usize,
74}
75
76impl ValidityMask {
77 fn is_all_valid_sentinel(&self) -> bool {
78 self.len > 0 && self.words.is_empty()
79 }
80
81 fn materialized_all_valid_words(len: usize) -> Vec<u64> {
82 let word_count = len.div_ceil(64);
83 let mut words = vec![u64::MAX; word_count];
84 let remainder = len % 64;
85 if remainder > 0
86 && let Some(last) = words.last_mut()
87 {
88 *last = (1_u64 << remainder) - 1;
89 }
90 words
91 }
92
93 fn words_are_all_valid(words: &[u64], len: usize) -> bool {
94 if len == 0 {
95 return words.is_empty();
96 }
97 let word_count = len.div_ceil(64);
98 if words.len() != word_count {
99 return false;
100 }
101 let full_words = len / 64;
102 if words.iter().take(full_words).any(|&word| word != u64::MAX) {
103 return false;
104 }
105 let remainder = len % 64;
106 if remainder == 0 {
107 return true;
108 }
109 words.get(full_words).copied() == Some((1_u64 << remainder) - 1)
110 }
111
112 fn materialize_if_all_valid_sentinel(&mut self) {
113 if self.is_all_valid_sentinel() {
114 self.words = Self::materialized_all_valid_words(self.len);
115 }
116 }
117
118 #[must_use]
119 pub fn from_values(values: &[Scalar]) -> Self {
120 let len = values.len();
121 let word_count = len.div_ceil(64);
122 let mut words = vec![0_u64; word_count];
123 let mut all_valid = true;
124 for (idx, value) in values.iter().enumerate() {
125 if !value.is_missing() {
126 words[idx / 64] |= 1_u64 << (idx % 64);
127 } else {
128 all_valid = false;
129 }
130 }
131 if all_valid {
132 return Self::all_valid(len);
133 }
134 Self { words, len }
135 }
136
137 #[must_use]
143 pub fn from_f64(data: &[f64]) -> Self {
144 let len = data.len();
145 let word_count = len.div_ceil(64);
146 let mut words = vec![0_u64; word_count];
147 let mut all_valid = true;
148 for (idx, &v) in data.iter().enumerate() {
149 if !v.is_nan() {
150 words[idx / 64] |= 1_u64 << (idx % 64);
151 } else {
152 all_valid = false;
153 }
154 }
155 if all_valid {
156 return Self::all_valid(len);
157 }
158 Self { words, len }
159 }
160
161 #[must_use]
162 pub fn all_valid(len: usize) -> Self {
163 Self {
164 words: Vec::new(),
165 len,
166 }
167 }
168
169 #[must_use]
174 #[doc(hidden)]
175 pub fn from_words(words: Vec<u64>, len: usize) -> Self {
176 debug_assert_eq!(words.len(), len.div_ceil(64));
177 debug_assert!(
178 len.is_multiple_of(64) || words.last().is_none_or(|w| w >> (len % 64) == 0),
179 "validity bits beyond len must be zero"
180 );
181 if Self::words_are_all_valid(&words, len) {
182 return Self::all_valid(len);
183 }
184 Self { words, len }
185 }
186
187 #[must_use]
188 pub fn all_invalid(len: usize) -> Self {
189 let word_count = len.div_ceil(64);
190 Self {
191 words: vec![0_u64; word_count],
192 len,
193 }
194 }
195
196 #[must_use]
197 pub fn get(&self, idx: usize) -> bool {
198 if idx >= self.len {
199 return false;
200 }
201 if self.is_all_valid_sentinel() {
202 return true;
203 }
204 (self.words[idx / 64] >> (idx % 64)) & 1 == 1
205 }
206
207 pub fn set(&mut self, idx: usize, value: bool) {
208 if idx >= self.len {
209 return;
210 }
211 if self.is_all_valid_sentinel() {
212 if value {
213 return;
214 }
215 self.materialize_if_all_valid_sentinel();
216 }
217 if value {
218 self.words[idx / 64] |= 1_u64 << (idx % 64);
219 } else {
220 self.words[idx / 64] &= !(1_u64 << (idx % 64));
221 }
222 }
223
224 #[must_use]
225 pub fn count_valid(&self) -> usize {
226 if self.is_all_valid_sentinel() {
227 return self.len;
228 }
229 let full_words = self.len / 64;
230 let mut count: u32 = self.words[..full_words]
231 .iter()
232 .map(|w| w.count_ones())
233 .sum();
234 let remainder = self.len % 64;
235 if remainder > 0 && full_words < self.words.len() {
236 let mask = (1_u64 << remainder) - 1;
237 count += (self.words[full_words] & mask).count_ones();
238 }
239 count as usize
240 }
241
242 #[must_use]
243 pub fn len(&self) -> usize {
244 self.len
245 }
246
247 #[must_use]
248 pub fn is_empty(&self) -> bool {
249 self.len == 0
250 }
251
252 #[must_use]
253 pub fn and_mask(&self, other: &Self) -> Self {
254 let len = self.len.min(other.len);
255 if len == 0 {
256 return Self::all_invalid(0);
257 }
258 if self.is_all_valid_sentinel() && other.is_all_valid_sentinel() {
259 return Self::all_valid(len);
260 }
261 if self.is_all_valid_sentinel() {
262 return other.slice(0, len);
263 }
264 if other.is_all_valid_sentinel() {
265 return self.slice(0, len);
266 }
267 let word_count = len.div_ceil(64);
268 let words = self.words[..word_count]
269 .iter()
270 .zip(&other.words[..word_count])
271 .map(|(a, b)| a & b)
272 .collect();
273 Self { words, len }
274 }
275
276 #[must_use]
277 pub fn or_mask(&self, other: &Self) -> Self {
278 let len = self.len.min(other.len);
279 if len == 0 {
280 return Self::all_invalid(0);
281 }
282 if self.is_all_valid_sentinel() || other.is_all_valid_sentinel() {
283 return Self::all_valid(len);
284 }
285 let word_count = len.div_ceil(64);
286 let words = self.words[..word_count]
287 .iter()
288 .zip(&other.words[..word_count])
289 .map(|(a, b)| a | b)
290 .collect();
291 Self { words, len }
292 }
293
294 #[must_use]
295 pub fn not_mask(&self) -> Self {
296 if self.is_all_valid_sentinel() {
297 return Self::all_invalid(self.len);
298 }
299 let mut words: Vec<u64> = self.words.iter().map(|w| !w).collect();
300 let remainder = self.len % 64;
301 if remainder > 0 && !words.is_empty() {
302 let last = words.len() - 1;
303 words[last] &= (1_u64 << remainder) - 1;
304 }
305 Self {
306 words,
307 len: self.len,
308 }
309 }
310
311 pub fn bits(&self) -> impl Iterator<Item = bool> + '_ {
314 (0..self.len).map(|idx| self.get(idx))
315 }
316
317 #[must_use]
319 pub fn count_invalid(&self) -> usize {
320 self.len.saturating_sub(self.count_valid())
321 }
322
323 #[must_use]
325 pub fn any(&self) -> bool {
326 if self.is_all_valid_sentinel() {
327 return true;
328 }
329 self.count_valid() > 0
330 }
331
332 #[must_use]
334 pub fn all(&self) -> bool {
335 if self.is_all_valid_sentinel() {
336 return true;
337 }
338 self.count_valid() == self.len
339 }
340
341 #[must_use]
345 pub fn xor_mask(&self, other: &Self) -> Self {
346 let len = self.len.min(other.len);
347 if len == 0 {
348 return Self::all_invalid(0);
349 }
350 if self.is_all_valid_sentinel() && other.is_all_valid_sentinel() {
351 return Self::all_invalid(len);
352 }
353 if self.is_all_valid_sentinel() {
354 return other.slice(0, len).not_mask();
355 }
356 if other.is_all_valid_sentinel() {
357 return self.slice(0, len).not_mask();
358 }
359 let word_count = len.div_ceil(64);
360 let mut words: Vec<u64> = self.words[..word_count]
361 .iter()
362 .zip(&other.words[..word_count])
363 .map(|(a, b)| a ^ b)
364 .collect();
365 let remainder = len % 64;
366 if remainder > 0 && !words.is_empty() {
367 let last = words.len() - 1;
368 words[last] &= (1_u64 << remainder) - 1;
369 }
370 Self { words, len }
371 }
372
373 #[must_use]
378 pub fn slice(&self, start: usize, len: usize) -> Self {
379 if start >= self.len {
380 return Self::all_invalid(0);
381 }
382 let effective_len = len.min(self.len - start);
383 if self.is_all_valid_sentinel() {
384 return Self::all_valid(effective_len);
385 }
386 let mut out = Self::all_invalid(effective_len);
387 for i in 0..effective_len {
388 if self.get(start + i) {
389 out.set(i, true);
390 }
391 }
392 out
393 }
394
395 #[must_use]
397 pub fn concat(&self, other: &Self) -> Self {
398 let total = self.len + other.len;
399 if self.all() && other.all() {
400 return Self::all_valid(total);
401 }
402 let mut out = Self::all_invalid(total);
403 for i in 0..self.len {
404 if self.get(i) {
405 out.set(i, true);
406 }
407 }
408 for i in 0..other.len {
409 if other.get(i) {
410 out.set(self.len + i, true);
411 }
412 }
413 out
414 }
415
416 #[must_use]
418 pub fn first_valid(&self) -> Option<usize> {
419 if self.is_all_valid_sentinel() {
420 return Some(0);
421 }
422 (0..self.len).find(|&i| self.get(i))
423 }
424
425 #[must_use]
427 pub fn last_valid(&self) -> Option<usize> {
428 if self.is_all_valid_sentinel() {
429 return Some(self.len - 1);
430 }
431 (0..self.len).rev().find(|&i| self.get(i))
432 }
433}
434
435impl PartialEq for ValidityMask {
436 fn eq(&self, other: &Self) -> bool {
437 self.len == other.len && self.bits().eq(other.bits())
438 }
439}
440
441impl Serialize for ValidityMask {
442 fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
443 use serde::ser::SerializeStruct;
444 let bits: Vec<bool> = self.bits().collect();
445 let mut state = serializer.serialize_struct("ValidityMask", 1)?;
446 state.serialize_field("bits", &bits)?;
447 state.end()
448 }
449}
450
451impl<'de> Deserialize<'de> for ValidityMask {
452 fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
453 #[derive(Deserialize)]
454 struct Raw {
455 bits: Vec<bool>,
456 }
457 let raw = Raw::deserialize(deserializer)?;
458 let len = raw.bits.len();
459 let word_count = len.div_ceil(64);
460 let mut words = vec![0_u64; word_count];
461 for (idx, &valid) in raw.bits.iter().enumerate() {
462 if valid {
463 words[idx / 64] |= 1_u64 << (idx % 64);
464 }
465 }
466 Ok(Self::from_words(words, len))
467 }
468}
469
470#[derive(Debug, Clone, PartialEq)]
479pub enum ColumnData {
480 Float64(Vec<f64>),
481 Int64(Vec<i64>),
482 Bool(Vec<bool>),
483 Utf8(Vec<String>),
484 Timedelta64(Vec<i64>),
485 Datetime64(Vec<i64>),
486 Period(Vec<i64>),
487 Interval(Vec<Interval>),
488}
489
490impl ColumnData {
491 #[must_use]
497 pub fn from_scalars(values: &[Scalar], dtype: DType) -> Self {
498 match dtype {
499 DType::Float64 => {
500 let data: Vec<f64> = values
501 .iter()
502 .map(|v| match v {
503 Scalar::Float64(f) => *f,
504 Scalar::Int64(i) => *i as f64,
505 Scalar::Bool(true) => 1.0,
506 Scalar::Bool(false) => 0.0,
507 _ => 0.0, })
509 .collect();
510 Self::Float64(data)
511 }
512 DType::Int64 | DType::Int64Nullable => {
513 let data: Vec<i64> = values
514 .iter()
515 .map(|v| match v {
516 Scalar::Int64(i) => *i,
517 Scalar::Bool(b) => i64::from(*b),
518 _ => 0, })
520 .collect();
521 Self::Int64(data)
522 }
523 DType::Categorical => {
524 let data: Vec<i64> = values
525 .iter()
526 .map(|v| match v {
527 Scalar::Int64(i) => *i,
528 _ => -1,
529 })
530 .collect();
531 Self::Int64(data)
532 }
533 DType::Bool | DType::BoolNullable => {
534 let data: Vec<bool> = values
535 .iter()
536 .map(|v| match v {
537 Scalar::Bool(b) => *b,
538 _ => false,
539 })
540 .collect();
541 Self::Bool(data)
542 }
543 DType::Utf8 => {
544 let data: Vec<String> = values
545 .iter()
546 .map(|v| match v {
547 Scalar::Utf8(s) => s.clone(),
548 _ => String::new(),
549 })
550 .collect();
551 Self::Utf8(data)
552 }
553 DType::Null => Self::Float64(vec![0.0; values.len()]),
554 DType::Sparse => Self::Utf8(vec![String::new(); values.len()]),
555 DType::Timedelta64 => {
556 let data: Vec<i64> = values
557 .iter()
558 .map(|v| match v {
559 Scalar::Timedelta64(n) => *n,
560 Scalar::Int64(i) => *i,
561 _ => Timedelta::NAT,
562 })
563 .collect();
564 Self::Timedelta64(data)
565 }
566 DType::Datetime64 => {
567 let data: Vec<i64> = values
568 .iter()
569 .map(|v| match v {
570 Scalar::Datetime64(n) => *n,
571 Scalar::Int64(i) => *i,
572 _ => Timestamp::NAT,
573 })
574 .collect();
575 Self::Datetime64(data)
576 }
577 DType::Period => {
578 let data: Vec<i64> = values
579 .iter()
580 .map(|v| match v {
581 Scalar::Period(n) => *n,
582 Scalar::Int64(i) => *i,
583 _ => i64::MIN, })
585 .collect();
586 Self::Period(data)
587 }
588 DType::Interval => {
589 let data: Vec<Interval> = values
590 .iter()
591 .map(|v| match v {
592 Scalar::Interval(interval) => *interval,
593 _ => Interval::new(0.0, 0.0, IntervalClosed::Right),
594 })
595 .collect();
596 Self::Interval(data)
597 }
598 }
599 }
600
601 #[must_use]
603 pub fn to_scalars(&self, dtype: DType, validity: &ValidityMask) -> Vec<Scalar> {
604 match self {
605 Self::Float64(data) => data
606 .iter()
607 .enumerate()
608 .map(|(i, v)| {
609 if !validity.get(i) {
610 Scalar::missing_for_dtype(dtype)
611 } else {
612 Scalar::Float64(*v)
613 }
614 })
615 .collect(),
616 Self::Int64(data) => data
617 .iter()
618 .enumerate()
619 .map(|(i, v)| {
620 if !validity.get(i) {
621 Scalar::missing_for_dtype(dtype)
622 } else {
623 Scalar::Int64(*v)
624 }
625 })
626 .collect(),
627 Self::Bool(data) => data
628 .iter()
629 .enumerate()
630 .map(|(i, v)| {
631 if !validity.get(i) {
632 Scalar::missing_for_dtype(dtype)
633 } else {
634 Scalar::Bool(*v)
635 }
636 })
637 .collect(),
638 Self::Utf8(data) => data
639 .iter()
640 .enumerate()
641 .map(|(i, v)| {
642 if !validity.get(i) {
643 Scalar::missing_for_dtype(dtype)
644 } else {
645 Scalar::Utf8(v.clone())
646 }
647 })
648 .collect(),
649 Self::Timedelta64(data) => data
650 .iter()
651 .enumerate()
652 .map(|(i, v)| {
653 if !validity.get(i) || *v == Timedelta::NAT {
654 Scalar::Timedelta64(Timedelta::NAT)
655 } else {
656 Scalar::Timedelta64(*v)
657 }
658 })
659 .collect(),
660 Self::Datetime64(data) => data
661 .iter()
662 .enumerate()
663 .map(|(i, v)| {
664 if !validity.get(i) || *v == Timestamp::NAT {
665 Scalar::Datetime64(Timestamp::NAT)
666 } else {
667 Scalar::Datetime64(*v)
668 }
669 })
670 .collect(),
671 Self::Period(data) => data
672 .iter()
673 .enumerate()
674 .map(|(i, v)| {
675 if !validity.get(i) || *v == i64::MIN {
676 Scalar::Period(i64::MIN)
677 } else {
678 Scalar::Period(*v)
679 }
680 })
681 .collect(),
682 Self::Interval(data) => data
683 .iter()
684 .enumerate()
685 .map(|(i, v)| {
686 if !validity.get(i) {
687 Scalar::missing_for_dtype(dtype)
688 } else {
689 Scalar::Interval(*v)
690 }
691 })
692 .collect(),
693 }
694 }
695
696 #[must_use]
697 pub fn len(&self) -> usize {
698 match self {
699 Self::Float64(d) => d.len(),
700 Self::Int64(d) => d.len(),
701 Self::Bool(d) => d.len(),
702 Self::Utf8(d) => d.len(),
703 Self::Timedelta64(d) => d.len(),
704 Self::Datetime64(d) => d.len(),
705 Self::Period(d) => d.len(),
706 Self::Interval(d) => d.len(),
707 }
708 }
709
710 #[must_use]
711 pub fn is_empty(&self) -> bool {
712 self.len() == 0
713 }
714}
715
716fn scalar_compare(left: &Scalar, right: &Scalar, op: ComparisonOp) -> Result<bool, ColumnError> {
721 let left_dtype = left.dtype();
723 let right_dtype = right.dtype();
724 if left_dtype != right_dtype
725 && let Ok(common) = fp_types::common_dtype(left_dtype, right_dtype)
726 && common == DType::Int64
727 {
728 let l_cast = fp_types::cast_scalar(left, common)?;
729 let r_cast = fp_types::cast_scalar(right, common)?;
730 if let (Scalar::Int64(a), Scalar::Int64(b)) = (&l_cast, &r_cast) {
732 return Ok(match op {
733 ComparisonOp::Gt => a > b,
734 ComparisonOp::Lt => a < b,
735 ComparisonOp::Eq => a == b,
736 ComparisonOp::Ne => a != b,
737 ComparisonOp::Ge => a >= b,
738 ComparisonOp::Le => a <= b,
739 });
740 }
741 }
742
743 if let (Scalar::Utf8(a), Scalar::Utf8(b)) = (left, right) {
745 return Ok(match op {
746 ComparisonOp::Gt => a > b,
747 ComparisonOp::Lt => a < b,
748 ComparisonOp::Eq => a == b,
749 ComparisonOp::Ne => a != b,
750 ComparisonOp::Ge => a >= b,
751 ComparisonOp::Le => a <= b,
752 });
753 }
754
755 if let (Scalar::Bool(a), Scalar::Bool(b)) = (left, right) {
757 return Ok(match op {
758 ComparisonOp::Gt => *a && !*b,
759 ComparisonOp::Lt => !*a && *b,
760 ComparisonOp::Eq => a == b,
761 ComparisonOp::Ne => a != b,
762 ComparisonOp::Ge => *a >= *b,
763 ComparisonOp::Le => *a <= *b,
764 });
765 }
766
767 if let (Scalar::Int64(a), Scalar::Int64(b)) = (left, right) {
769 return Ok(match op {
770 ComparisonOp::Gt => a > b,
771 ComparisonOp::Lt => a < b,
772 ComparisonOp::Eq => a == b,
773 ComparisonOp::Ne => a != b,
774 ComparisonOp::Ge => a >= b,
775 ComparisonOp::Le => a <= b,
776 });
777 }
778
779 let lhs = left.to_f64()?;
781 let rhs = right.to_f64()?;
782
783 Ok(match op {
784 ComparisonOp::Gt => lhs > rhs,
785 ComparisonOp::Lt => lhs < rhs,
786 ComparisonOp::Eq => lhs == rhs,
787 ComparisonOp::Ne => lhs != rhs,
788 ComparisonOp::Ge => lhs >= rhs,
789 ComparisonOp::Le => lhs <= rhs,
790 })
791}
792
793fn vectorized_binary_f64(
799 left: &[f64],
800 right: &[f64],
801 left_validity: &ValidityMask,
802 right_validity: &ValidityMask,
803 op: ArithmeticOp,
804) -> (Vec<f64>, ValidityMask) {
805 let combined = left_validity.and_mask(right_validity);
806
807 let apply = binary_f64_apply(op);
809
810 let out: Vec<f64> = left
811 .iter()
812 .zip(right.iter())
813 .enumerate()
814 .map(|(i, (&l, &r))| {
815 if combined.get(i) {
816 apply(l, r)
817 } else {
818 0.0 }
820 })
821 .collect();
822
823 (out, combined)
824}
825
826fn binary_f64_apply(op: ArithmeticOp) -> fn(f64, f64) -> f64 {
827 match op {
828 ArithmeticOp::Add => |a, b| a + b,
829 ArithmeticOp::Sub => |a, b| a - b,
830 ArithmeticOp::Mul => |a, b| a * b,
831 ArithmeticOp::Div => |a, b| a / b,
832 ArithmeticOp::Mod => python_mod_f64,
833 ArithmeticOp::Pow => |a, b| a.powf(b),
834 ArithmeticOp::FloorDiv => python_floor_div_f64,
835 }
836}
837
838#[inline]
845fn apply_f64_slices(op: ArithmeticOp, a: &[f64], b: &[f64]) -> Vec<f64> {
846 match op {
847 ArithmeticOp::Add => a.iter().zip(b).map(|(x, y)| x + y).collect(),
848 ArithmeticOp::Sub => a.iter().zip(b).map(|(x, y)| x - y).collect(),
849 ArithmeticOp::Mul => a.iter().zip(b).map(|(x, y)| x * y).collect(),
850 ArithmeticOp::Div => a.iter().zip(b).map(|(x, y)| x / y).collect(),
851 ArithmeticOp::Mod => a
852 .iter()
853 .zip(b)
854 .map(|(x, y)| python_mod_f64(*x, *y))
855 .collect(),
856 ArithmeticOp::Pow => a.iter().zip(b).map(|(x, y)| x.powf(*y)).collect(),
857 ArithmeticOp::FloorDiv => a
858 .iter()
859 .zip(b)
860 .map(|(x, y)| python_floor_div_f64(*x, *y))
861 .collect(),
862 }
863}
864
865fn unit_range_len(start: i64, end: i64) -> Option<usize> {
866 usize::try_from(end.checked_sub(start)?.checked_add(1)?).ok()
867}
868
869fn python_mod_f64(lhs: f64, rhs: f64) -> f64 {
870 if lhs.is_nan() || rhs.is_nan() {
871 return f64::NAN;
872 }
873
874 if rhs.is_infinite() {
875 if lhs.is_infinite() {
876 return f64::NAN;
877 }
878 if lhs == 0.0 {
879 return 0.0_f64.copysign(rhs);
880 }
881 if lhs.is_sign_positive() == rhs.is_sign_positive() {
882 lhs
883 } else {
884 rhs
885 }
886 } else {
887 lhs - python_floor_div_f64(lhs, rhs) * rhs
888 }
889}
890
891fn python_floor_div_f64(lhs: f64, rhs: f64) -> f64 {
892 if lhs.is_nan() || rhs.is_nan() {
893 return f64::NAN;
894 }
895
896 if rhs.is_infinite() {
897 if lhs.is_infinite() {
898 return f64::NAN;
899 }
900 if lhs == 0.0 {
901 return (lhs / rhs).floor();
902 }
903 if lhs.is_sign_positive() == rhs.is_sign_positive() {
904 0.0
905 } else {
906 -1.0
907 }
908 } else if lhs.is_infinite() && rhs != 0.0 {
909 f64::NAN
910 } else {
911 (lhs / rhs).floor()
912 }
913}
914
915fn python_floor_div_i64(lhs: i64, rhs: i64) -> i64 {
916 debug_assert_ne!(rhs, 0);
917 if lhs == i64::MIN && rhs == -1 {
918 return i64::MIN;
919 }
920
921 let quotient = lhs / rhs;
922 let remainder = lhs % rhs;
923 if remainder != 0 && ((remainder > 0) != (rhs > 0)) {
924 quotient - 1
925 } else {
926 quotient
927 }
928}
929
930fn python_mod_i64(lhs: i64, rhs: i64) -> i64 {
931 debug_assert_ne!(rhs, 0);
932 if lhs == i64::MIN && rhs == -1 {
933 return 0;
934 }
935
936 let quotient = i128::from(python_floor_div_i64(lhs, rhs));
937 let remainder = i128::from(lhs) - quotient * i128::from(rhs);
938 let Ok(value) = i64::try_from(remainder) else {
939 return 0;
940 };
941 value
942}
943
944fn vectorized_binary_i64(
949 left: &[i64],
950 right: &[i64],
951 left_validity: &ValidityMask,
952 right_validity: &ValidityMask,
953 op: ArithmeticOp,
954) -> Option<(Vec<i64>, ValidityMask)> {
955 let combined = left_validity.and_mask(right_validity);
956
957 if matches!(op, ArithmeticOp::Div | ArithmeticOp::Pow) {
959 return None;
960 }
961
962 if matches!(op, ArithmeticOp::Mod | ArithmeticOp::FloorDiv) {
972 let has_zero_divisor = right
973 .iter()
974 .enumerate()
975 .any(|(i, &r)| right_validity.get(i) && r == 0);
976 if has_zero_divisor {
977 return None;
978 }
979 }
980
981 let apply: fn(i64, i64) -> i64 = match op {
982 ArithmeticOp::Add => |a, b| a.wrapping_add(b),
983 ArithmeticOp::Sub => |a, b| a.wrapping_sub(b),
984 ArithmeticOp::Mul => |a, b| a.wrapping_mul(b),
985 ArithmeticOp::Mod => python_mod_i64,
986 ArithmeticOp::FloorDiv => python_floor_div_i64,
987 ArithmeticOp::Div | ArithmeticOp::Pow => unreachable!("handled by early return above"),
988 };
989
990 let out: Vec<i64> = left
991 .iter()
992 .zip(right.iter())
993 .enumerate()
994 .map(|(i, (&l, &r))| {
995 if combined.get(i) {
996 apply(l, r)
997 } else {
998 0 }
1000 })
1001 .collect();
1002
1003 Some((out, combined))
1004}
1005
1006enum ScalarValues {
1007 Eager(Vec<Scalar>),
1008 LazyAllValidInt64 {
1009 data: Arc<[i64]>,
1010 values: OnceLock<Vec<Scalar>>,
1011 },
1012 LazyAllValidFloat64 {
1013 data: Arc<[f64]>,
1014 values: OnceLock<Vec<Scalar>>,
1015 },
1016 LazyNullableFloat64 {
1017 data: Vec<f64>,
1018 validity: ValidityMask,
1019 values: OnceLock<Vec<Scalar>>,
1020 },
1021 LazyNullableInt64 {
1027 data: Vec<i64>,
1028 validity: ValidityMask,
1029 values: OnceLock<Vec<Scalar>>,
1030 },
1031 LazyAllValidBool {
1032 data: Arc<[bool]>,
1033 values: OnceLock<Vec<Scalar>>,
1034 },
1035 LazyContiguousUtf8 {
1049 bytes: Arc<[u8]>,
1050 offsets: Arc<[usize]>,
1051 strictly_increasing: OnceLock<bool>,
1052 fixed_width: OnceLock<Option<usize>>,
1053 values: OnceLock<Vec<Scalar>>,
1054 },
1055 LazyRepeatRunsInt64 {
1062 runs: Vec<(i64, usize)>,
1063 total_len: usize,
1064 data: OnceLock<Vec<i64>>,
1065 values: OnceLock<Vec<Scalar>>,
1066 },
1067 LazyRepeatValuesInt64 {
1071 run_values: Vec<i64>,
1072 run_lens: Arc<[usize]>,
1073 total_len: usize,
1074 data: OnceLock<Vec<i64>>,
1075 values: OnceLock<Vec<Scalar>>,
1076 },
1077 LazyRepeatedSlicesInt64 {
1082 data: Vec<i64>,
1083 segments: Arc<[(usize, usize)]>,
1084 total_len: usize,
1085 expanded: OnceLock<Vec<i64>>,
1086 values: OnceLock<Vec<Scalar>>,
1087 },
1088 LazyUtf8Slice {
1098 bytes: Arc<[u8]>,
1099 offsets: Arc<[usize]>,
1100 start: usize,
1101 len: usize,
1102 values: OnceLock<Vec<Scalar>>,
1103 },
1104}
1105
1106type Utf8ArcViewSource = (Arc<[u8]>, Arc<[usize]>, usize);
1107
1108impl ScalarValues {
1109 fn from_vec(values: Vec<Scalar>) -> Self {
1110 Self::Eager(values)
1111 }
1112
1113 fn lazy_all_valid_int64(data: Vec<i64>) -> Self {
1114 Self::lazy_all_valid_int64_arc(Arc::from(data))
1115 }
1116
1117 fn lazy_all_valid_int64_arc(data: Arc<[i64]>) -> Self {
1121 Self::LazyAllValidInt64 {
1122 data,
1123 values: OnceLock::new(),
1124 }
1125 }
1126
1127 fn lazy_all_valid_float64(data: Vec<f64>) -> Self {
1128 Self::lazy_all_valid_float64_arc(Arc::from(data))
1129 }
1130
1131 fn lazy_all_valid_float64_arc(data: Arc<[f64]>) -> Self {
1133 Self::LazyAllValidFloat64 {
1134 data,
1135 values: OnceLock::new(),
1136 }
1137 }
1138
1139 fn lazy_nullable_float64(data: Vec<f64>, validity: ValidityMask) -> Self {
1140 Self::LazyNullableFloat64 {
1141 data,
1142 validity,
1143 values: OnceLock::new(),
1144 }
1145 }
1146
1147 fn lazy_nullable_int64(data: Vec<i64>, validity: ValidityMask) -> Self {
1148 Self::LazyNullableInt64 {
1149 data,
1150 validity,
1151 values: OnceLock::new(),
1152 }
1153 }
1154
1155 fn lazy_all_valid_bool(data: Vec<bool>) -> Self {
1156 Self::lazy_all_valid_bool_arc(Arc::from(data))
1157 }
1158
1159 fn lazy_all_valid_bool_arc(data: Arc<[bool]>) -> Self {
1161 Self::LazyAllValidBool {
1162 data,
1163 values: OnceLock::new(),
1164 }
1165 }
1166
1167 fn lazy_contiguous_utf8(bytes: Vec<u8>, offsets: Vec<usize>) -> Self {
1168 debug_assert!(!offsets.is_empty(), "offsets must hold n+1 entries");
1169 debug_assert_eq!(*offsets.last().expect("non-empty"), bytes.len());
1170 Self::lazy_contiguous_utf8_arc(Arc::from(bytes), Arc::from(offsets))
1171 }
1172
1173 fn lazy_contiguous_utf8_arc(bytes: Arc<[u8]>, offsets: Arc<[usize]>) -> Self {
1180 debug_assert!(!offsets.is_empty(), "offsets must hold n+1 entries");
1181 debug_assert_eq!(*offsets.last().expect("non-empty"), bytes.len());
1182 Self::LazyContiguousUtf8 {
1183 bytes,
1184 offsets,
1185 strictly_increasing: OnceLock::new(),
1186 fixed_width: OnceLock::new(),
1187 values: OnceLock::new(),
1188 }
1189 }
1190
1191 fn lazy_utf8_slice(bytes: Arc<[u8]>, offsets: Arc<[usize]>, start: usize, len: usize) -> Self {
1196 debug_assert!(
1197 start + len < offsets.len(),
1198 "view window must lie within the source offsets"
1199 );
1200 Self::LazyUtf8Slice {
1201 bytes,
1202 offsets,
1203 start,
1204 len,
1205 values: OnceLock::new(),
1206 }
1207 }
1208
1209 fn lazy_repeat_runs_int64(runs: Vec<(i64, usize)>, total_len: usize) -> Self {
1210 debug_assert_eq!(
1211 runs.iter().map(|&(_, run_len)| run_len).sum::<usize>(),
1212 total_len
1213 );
1214 Self::LazyRepeatRunsInt64 {
1215 runs,
1216 total_len,
1217 data: OnceLock::new(),
1218 values: OnceLock::new(),
1219 }
1220 }
1221
1222 fn lazy_repeat_values_int64(
1223 run_values: Vec<i64>,
1224 run_lens: Arc<[usize]>,
1225 total_len: usize,
1226 ) -> Self {
1227 debug_assert_eq!(run_values.len(), run_lens.len());
1228 debug_assert_eq!(run_lens.iter().sum::<usize>(), total_len);
1229 Self::LazyRepeatValuesInt64 {
1230 run_values,
1231 run_lens,
1232 total_len,
1233 data: OnceLock::new(),
1234 values: OnceLock::new(),
1235 }
1236 }
1237
1238 fn lazy_repeated_slices_int64(
1239 data: Vec<i64>,
1240 segments: Vec<(usize, usize)>,
1241 total_len: usize,
1242 ) -> Self {
1243 Self::lazy_repeated_slices_int64_shared(data, Arc::from(segments), total_len)
1244 }
1245
1246 fn lazy_repeated_slices_int64_shared(
1247 data: Vec<i64>,
1248 segments: Arc<[(usize, usize)]>,
1249 total_len: usize,
1250 ) -> Self {
1251 debug_assert_eq!(
1252 segments.iter().map(|&(_, len)| len).sum::<usize>(),
1253 total_len
1254 );
1255 debug_assert!(
1256 segments
1257 .iter()
1258 .all(|&(start, len)| start.checked_add(len).is_some_and(|end| end <= data.len()))
1259 );
1260 Self::LazyRepeatedSlicesInt64 {
1261 data,
1262 segments,
1263 total_len,
1264 expanded: OnceLock::new(),
1265 values: OnceLock::new(),
1266 }
1267 }
1268
1269 fn expand_repeat_values_i64(
1270 run_values: &[i64],
1271 run_lens: &[usize],
1272 total_len: usize,
1273 ) -> Vec<i64> {
1274 const PARALLEL_MIN_VALUES: usize = 1 << 18;
1275 const PARALLEL_MAX_CHUNKS: usize = 16;
1276
1277 debug_assert_eq!(run_values.len(), run_lens.len());
1278 let thread_count = std::thread::available_parallelism()
1279 .map_or(1, usize::from)
1280 .min(PARALLEL_MAX_CHUNKS);
1281 if total_len < PARALLEL_MIN_VALUES || thread_count < 2 || run_values.is_empty() {
1282 let mut out = Vec::with_capacity(total_len);
1283 for (&value, &run_len) in run_values.iter().zip(run_lens.iter()) {
1284 out.resize(out.len() + run_len, value);
1285 }
1286 return out;
1287 }
1288
1289 let target = total_len.div_ceil(thread_count).max(1);
1290 let mut boundaries = vec![(0usize, 0usize)];
1291 let mut cumulative = 0usize;
1292 let mut next_target = target;
1293 for (run_idx, &run_len) in run_lens.iter().enumerate() {
1294 cumulative += run_len;
1295 if cumulative >= next_target && run_idx + 1 < run_lens.len() {
1296 boundaries.push((run_idx + 1, cumulative));
1297 next_target = cumulative.saturating_add(target);
1298 }
1299 }
1300 debug_assert_eq!(cumulative, total_len);
1301 boundaries.push((run_lens.len(), total_len));
1302
1303 let mut out = vec![0i64; total_len];
1304 let mut chunk_slices = Vec::with_capacity(boundaries.len() - 1);
1305 let mut rest: &mut [i64] = out.as_mut_slice();
1306 let mut prev = 0usize;
1307 for window in boundaries.windows(2) {
1308 let (chunk_slice, tail) = rest.split_at_mut(window[1].1 - prev);
1309 prev = window[1].1;
1310 rest = tail;
1311 chunk_slices.push(chunk_slice);
1312 }
1313
1314 std::thread::scope(|scope| {
1315 let mut handles = Vec::with_capacity(chunk_slices.len());
1316 for (chunk_idx, chunk_slice) in chunk_slices.into_iter().enumerate() {
1317 let (run_start, _) = boundaries[chunk_idx];
1318 let (run_end, _) = boundaries[chunk_idx + 1];
1319 let run_values = &run_values[run_start..run_end];
1320 let run_lens = &run_lens[run_start..run_end];
1321 handles.push(scope.spawn(move || {
1322 let mut cursor = 0usize;
1323 for (&value, &run_len) in run_values.iter().zip(run_lens.iter()) {
1324 chunk_slice[cursor..cursor + run_len].fill(value);
1325 cursor += run_len;
1326 }
1327 debug_assert_eq!(cursor, chunk_slice.len());
1328 }));
1329 }
1330 for handle in handles {
1331 handle
1332 .join()
1333 .expect("repeat-value expansion worker must not panic");
1334 }
1335 });
1336 out
1337 }
1338
1339 fn expand_repeated_slices_i64(
1340 data: &[i64],
1341 segments: &[(usize, usize)],
1342 total_len: usize,
1343 ) -> Vec<i64> {
1344 const PARALLEL_MIN_VALUES: usize = 1 << 18;
1345 const PARALLEL_MAX_CHUNKS: usize = 16;
1346
1347 let thread_count = std::thread::available_parallelism()
1348 .map_or(1, usize::from)
1349 .min(PARALLEL_MAX_CHUNKS);
1350 if total_len < PARALLEL_MIN_VALUES || thread_count < 2 || segments.is_empty() {
1351 let mut out = Vec::with_capacity(total_len);
1352 for &(start, len) in segments {
1353 out.extend_from_slice(&data[start..start + len]);
1354 }
1355 return out;
1356 }
1357
1358 let target = total_len.div_ceil(thread_count).max(1);
1359 let mut boundaries = vec![(0usize, 0usize)];
1360 let mut cumulative = 0usize;
1361 let mut next_target = target;
1362 for (segment_idx, &(_, len)) in segments.iter().enumerate() {
1363 cumulative += len;
1364 if cumulative >= next_target && segment_idx + 1 < segments.len() {
1365 boundaries.push((segment_idx + 1, cumulative));
1366 next_target = cumulative.saturating_add(target);
1367 }
1368 }
1369 debug_assert_eq!(cumulative, total_len);
1370 boundaries.push((segments.len(), total_len));
1371
1372 let mut out = vec![0i64; total_len];
1373 let mut chunk_slices = Vec::with_capacity(boundaries.len() - 1);
1374 let mut rest: &mut [i64] = out.as_mut_slice();
1375 let mut prev = 0usize;
1376 for window in boundaries.windows(2) {
1377 let (chunk_slice, tail) = rest.split_at_mut(window[1].1 - prev);
1378 prev = window[1].1;
1379 rest = tail;
1380 chunk_slices.push(chunk_slice);
1381 }
1382
1383 std::thread::scope(|scope| {
1384 let mut handles = Vec::with_capacity(chunk_slices.len());
1385 for (chunk_idx, chunk_slice) in chunk_slices.into_iter().enumerate() {
1386 let (segment_start, _) = boundaries[chunk_idx];
1387 let (segment_end, _) = boundaries[chunk_idx + 1];
1388 let segments = &segments[segment_start..segment_end];
1389 handles.push(scope.spawn(move || {
1390 let mut cursor = 0usize;
1391 for &(start, len) in segments {
1392 chunk_slice[cursor..cursor + len]
1393 .copy_from_slice(&data[start..start + len]);
1394 cursor += len;
1395 }
1396 debug_assert_eq!(cursor, chunk_slice.len());
1397 }));
1398 }
1399 for handle in handles {
1400 handle
1401 .join()
1402 .expect("repeated-slice expansion worker must not panic");
1403 }
1404 });
1405 out
1406 }
1407
1408 fn repeat_runs_i64_data(&self) -> Option<&[i64]> {
1416 const PARALLEL_MIN_VALUES: usize = 1 << 18;
1417 const PARALLEL_MAX_CHUNKS: usize = 16;
1418
1419 if let Self::LazyRepeatRunsInt64 {
1420 runs,
1421 total_len,
1422 data,
1423 ..
1424 } = self
1425 {
1426 return Some(
1427 data.get_or_init(|| {
1428 let thread_count = std::thread::available_parallelism()
1429 .map_or(1, usize::from)
1430 .min(PARALLEL_MAX_CHUNKS);
1431 if *total_len < PARALLEL_MIN_VALUES || thread_count < 2 || runs.is_empty() {
1432 let mut out = Vec::with_capacity(*total_len);
1433 for &(value, run_len) in runs {
1434 out.resize(out.len() + run_len, value);
1435 }
1436 return out;
1437 }
1438
1439 let target = total_len.div_ceil(thread_count).max(1);
1442 let mut boundaries = vec![(0usize, 0usize)];
1443 let mut cumulative = 0usize;
1444 let mut next_target = target;
1445 for (run_idx, &(_, run_len)) in runs.iter().enumerate() {
1446 cumulative += run_len;
1447 if cumulative >= next_target && run_idx + 1 < runs.len() {
1448 boundaries.push((run_idx + 1, cumulative));
1449 next_target = cumulative.saturating_add(target);
1450 }
1451 }
1452 debug_assert_eq!(cumulative, *total_len);
1453 boundaries.push((runs.len(), *total_len));
1454
1455 let mut out = vec![0i64; *total_len];
1456 let mut chunk_slices = Vec::with_capacity(boundaries.len() - 1);
1457 let mut rest: &mut [i64] = out.as_mut_slice();
1458 let mut prev = 0usize;
1459 for window in boundaries.windows(2) {
1460 let (chunk_slice, tail) = rest.split_at_mut(window[1].1 - prev);
1461 prev = window[1].1;
1462 rest = tail;
1463 chunk_slices.push(chunk_slice);
1464 }
1465
1466 std::thread::scope(|scope| {
1467 let mut handles = Vec::with_capacity(chunk_slices.len());
1468 for (chunk_idx, chunk_slice) in chunk_slices.into_iter().enumerate() {
1469 let (run_start, _) = boundaries[chunk_idx];
1470 let (run_end, _) = boundaries[chunk_idx + 1];
1471 let runs = &runs[run_start..run_end];
1472 handles.push(scope.spawn(move || {
1473 let mut cursor = 0usize;
1474 for &(value, run_len) in runs {
1475 chunk_slice[cursor..cursor + run_len].fill(value);
1476 cursor += run_len;
1477 }
1478 debug_assert_eq!(cursor, chunk_slice.len());
1479 }));
1480 }
1481 for handle in handles {
1482 handle
1483 .join()
1484 .expect("repeat-run expansion worker must not panic");
1485 }
1486 });
1487 out
1488 })
1489 .as_slice(),
1490 );
1491 }
1492 if let Self::LazyRepeatValuesInt64 {
1493 run_values,
1494 run_lens,
1495 total_len,
1496 data,
1497 ..
1498 } = self
1499 {
1500 return Some(
1501 data.get_or_init(|| {
1502 Self::expand_repeat_values_i64(run_values, run_lens, *total_len)
1503 })
1504 .as_slice(),
1505 );
1506 }
1507 None
1508 }
1509
1510 fn repeated_slices_i64_data(&self) -> Option<&[i64]> {
1511 if let Self::LazyRepeatedSlicesInt64 {
1512 data,
1513 segments,
1514 total_len,
1515 expanded,
1516 ..
1517 } = self
1518 {
1519 return Some(
1520 expanded
1521 .get_or_init(|| Self::expand_repeated_slices_i64(data, segments, *total_len))
1522 .as_slice(),
1523 );
1524 }
1525 None
1526 }
1527
1528 fn as_slice(&self) -> &[Scalar] {
1529 match self {
1530 Self::Eager(values) => values,
1531 Self::LazyAllValidInt64 { data, values } => values
1532 .get_or_init(|| data.iter().copied().map(Scalar::Int64).collect())
1533 .as_slice(),
1534 Self::LazyAllValidFloat64 { data, values } => values
1535 .get_or_init(|| data.iter().copied().map(Scalar::Float64).collect())
1536 .as_slice(),
1537 Self::LazyNullableFloat64 {
1538 data,
1539 validity,
1540 values,
1541 } => values
1542 .get_or_init(|| {
1543 data.iter()
1544 .enumerate()
1545 .map(|(idx, value)| {
1546 if validity.get(idx) || value.is_nan() {
1547 Scalar::Float64(*value)
1548 } else {
1549 Scalar::Null(NullKind::NaN)
1550 }
1551 })
1552 .collect()
1553 })
1554 .as_slice(),
1555 Self::LazyAllValidBool { data, values } => values
1556 .get_or_init(|| data.iter().copied().map(Scalar::Bool).collect())
1557 .as_slice(),
1558 Self::LazyContiguousUtf8 {
1559 bytes,
1560 offsets,
1561 values,
1562 ..
1563 } => values
1564 .get_or_init(|| {
1565 offsets
1566 .windows(2)
1567 .map(|w| {
1568 Scalar::Utf8(
1569 std::str::from_utf8(&bytes[w[0]..w[1]])
1570 .expect("contiguous utf8 buffer is valid by construction")
1571 .to_owned(),
1572 )
1573 })
1574 .collect()
1575 })
1576 .as_slice(),
1577 Self::LazyNullableInt64 {
1578 data,
1579 validity,
1580 values,
1581 } => values
1582 .get_or_init(|| {
1583 data.iter()
1584 .enumerate()
1585 .map(|(idx, value)| {
1586 if validity.get(idx) {
1587 Scalar::Int64(*value)
1588 } else {
1589 Scalar::Null(NullKind::Null)
1590 }
1591 })
1592 .collect()
1593 })
1594 .as_slice(),
1595 Self::LazyRepeatRunsInt64 {
1596 runs,
1597 total_len,
1598 values,
1599 ..
1600 } => values
1601 .get_or_init(|| {
1602 let mut out = Vec::with_capacity(*total_len);
1603 for &(value, run_len) in runs {
1604 out.resize(out.len() + run_len, Scalar::Int64(value));
1605 }
1606 out
1607 })
1608 .as_slice(),
1609 Self::LazyRepeatValuesInt64 {
1610 run_values,
1611 run_lens,
1612 total_len,
1613 values,
1614 ..
1615 } => values
1616 .get_or_init(|| {
1617 let mut out = Vec::with_capacity(*total_len);
1618 for (&value, &run_len) in run_values.iter().zip(run_lens.iter()) {
1619 out.resize(out.len() + run_len, Scalar::Int64(value));
1620 }
1621 out
1622 })
1623 .as_slice(),
1624 Self::LazyRepeatedSlicesInt64 {
1625 data,
1626 segments,
1627 total_len,
1628 values,
1629 ..
1630 } => values
1631 .get_or_init(|| {
1632 Self::expand_repeated_slices_i64(data, segments, *total_len)
1633 .into_iter()
1634 .map(Scalar::Int64)
1635 .collect()
1636 })
1637 .as_slice(),
1638 Self::LazyUtf8Slice {
1639 bytes,
1640 offsets,
1641 start,
1642 len,
1643 values,
1644 } => values
1645 .get_or_init(|| {
1646 (0..*len)
1647 .map(|i| {
1648 let lo = offsets[start + i];
1649 let hi = offsets[start + i + 1];
1650 Scalar::Utf8(
1651 std::str::from_utf8(&bytes[lo..hi])
1652 .expect("contiguous utf8 buffer is valid by construction")
1653 .to_owned(),
1654 )
1655 })
1656 .collect()
1657 })
1658 .as_slice(),
1659 }
1660 }
1661
1662 fn len(&self) -> usize {
1663 match self {
1664 Self::Eager(values) => values.len(),
1665 Self::LazyAllValidInt64 { data, .. } => data.len(),
1666 Self::LazyAllValidFloat64 { data, .. } => data.len(),
1667 Self::LazyNullableFloat64 { data, .. } => data.len(),
1668 Self::LazyAllValidBool { data, .. } => data.len(),
1669 Self::LazyContiguousUtf8 { offsets, .. } => offsets.len() - 1,
1670 Self::LazyNullableInt64 { data, .. } => data.len(),
1671 Self::LazyRepeatRunsInt64 { total_len, .. } => *total_len,
1672 Self::LazyRepeatValuesInt64 { total_len, .. } => *total_len,
1673 Self::LazyRepeatedSlicesInt64 { total_len, .. } => *total_len,
1674 Self::LazyUtf8Slice { len, .. } => *len,
1675 }
1676 }
1677
1678 fn is_empty(&self) -> bool {
1679 self.len() == 0
1680 }
1681}
1682
1683fn contiguous_utf8_offsets_are_strictly_increasing(bytes: &[u8], offsets: &[usize]) -> bool {
1684 let Some(n) = offsets.len().checked_sub(1) else {
1685 return false;
1686 };
1687 if n < 2 {
1688 return true;
1689 }
1690
1691 let mut previous = &bytes[offsets[0]..offsets[1]];
1692 for pos in 1..n {
1693 let current = &bytes[offsets[pos]..offsets[pos + 1]];
1694 if previous >= current {
1695 return false;
1696 }
1697 previous = current;
1698 }
1699 true
1700}
1701
1702fn contiguous_utf8_fixed_width(offsets: &[usize]) -> Option<usize> {
1703 let n = offsets.len().checked_sub(1)?;
1704 if n == 0 {
1705 return Some(0);
1706 }
1707 let width = offsets[1].checked_sub(offsets[0])?;
1708 for pos in 1..n {
1709 if offsets[pos + 1].checked_sub(offsets[pos])? != width {
1710 return None;
1711 }
1712 }
1713 Some(width)
1714}
1715
1716fn contiguous_ascending_start(positions: &[usize]) -> Option<usize> {
1722 let first = *positions.first()?;
1723 for (i, &pos) in positions.iter().enumerate() {
1724 if pos != first + i {
1725 return None;
1726 }
1727 }
1728 Some(first)
1729}
1730
1731impl Clone for ScalarValues {
1732 fn clone(&self) -> Self {
1733 match self {
1734 Self::Eager(values) => Self::Eager(values.clone()),
1735 Self::LazyAllValidInt64 { data, .. } => Self::lazy_all_valid_int64_arc(Arc::clone(data)),
1736 Self::LazyAllValidFloat64 { data, .. } => {
1737 Self::lazy_all_valid_float64_arc(Arc::clone(data))
1738 }
1739 Self::LazyNullableFloat64 { data, validity, .. } => {
1740 Self::lazy_nullable_float64(data.clone(), validity.clone())
1741 }
1742 Self::LazyAllValidBool { data, .. } => Self::lazy_all_valid_bool_arc(Arc::clone(data)),
1743 Self::LazyContiguousUtf8 { bytes, offsets, .. } => {
1744 Self::lazy_contiguous_utf8_arc(Arc::clone(bytes), Arc::clone(offsets))
1745 }
1746 Self::LazyNullableInt64 { data, validity, .. } => {
1747 Self::lazy_nullable_int64(data.clone(), validity.clone())
1748 }
1749 Self::LazyRepeatRunsInt64 {
1750 runs, total_len, ..
1751 } => Self::lazy_repeat_runs_int64(runs.clone(), *total_len),
1752 Self::LazyRepeatValuesInt64 {
1753 run_values,
1754 run_lens,
1755 total_len,
1756 ..
1757 } => {
1758 Self::lazy_repeat_values_int64(run_values.clone(), Arc::clone(run_lens), *total_len)
1759 }
1760 Self::LazyRepeatedSlicesInt64 {
1761 data,
1762 segments,
1763 total_len,
1764 ..
1765 } => Self::lazy_repeated_slices_int64_shared(
1766 data.clone(),
1767 Arc::clone(segments),
1768 *total_len,
1769 ),
1770 Self::LazyUtf8Slice {
1771 bytes,
1772 offsets,
1773 start,
1774 len,
1775 ..
1776 } => Self::lazy_utf8_slice(Arc::clone(bytes), Arc::clone(offsets), *start, *len),
1777 }
1778 }
1779}
1780
1781impl std::ops::Deref for ScalarValues {
1782 type Target = [Scalar];
1783
1784 fn deref(&self) -> &Self::Target {
1785 self.as_slice()
1786 }
1787}
1788
1789impl<'a> IntoIterator for &'a ScalarValues {
1790 type Item = &'a Scalar;
1791 type IntoIter = std::slice::Iter<'a, Scalar>;
1792
1793 fn into_iter(self) -> Self::IntoIter {
1794 self.as_slice().iter()
1795 }
1796}
1797
1798impl PartialEq for ScalarValues {
1799 fn eq(&self, other: &Self) -> bool {
1800 self.as_slice() == other.as_slice()
1801 }
1802}
1803
1804impl std::fmt::Debug for ScalarValues {
1805 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1806 self.as_slice().fmt(f)
1807 }
1808}
1809
1810impl Serialize for ScalarValues {
1811 fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
1812 self.as_slice().serialize(serializer)
1813 }
1814}
1815
1816impl<'de> Deserialize<'de> for ScalarValues {
1817 fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1818 Vec::<Scalar>::deserialize(deserializer).map(Self::Eager)
1819 }
1820}
1821
1822#[derive(Serialize, Deserialize)]
1823pub struct Column {
1824 dtype: DType,
1825 values: ScalarValues,
1826 validity: ValidityMask,
1827 #[serde(skip)]
1828 data: Option<ColumnData>,
1829}
1830
1831impl Clone for Column {
1832 fn clone(&self) -> Self {
1833 Self {
1834 dtype: self.dtype,
1835 values: self
1836 .clone_dense_values_from_cache()
1837 .unwrap_or_else(|| self.values.clone()),
1838 validity: self.validity.clone(),
1839 data: None,
1840 }
1841 }
1842}
1843
1844impl PartialEq for Column {
1845 fn eq(&self, other: &Self) -> bool {
1846 self.dtype == other.dtype && self.values == other.values && self.validity == other.validity
1847 }
1848}
1849
1850impl std::fmt::Debug for Column {
1851 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1852 f.debug_struct("Column")
1853 .field("dtype", &self.dtype)
1854 .field("values", &self.values)
1855 .field("validity", &self.validity)
1856 .finish()
1857 }
1858}
1859
1860#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1861pub struct SparseColumn {
1862 dtype: SparseDType,
1863 len: usize,
1864 indices: Vec<usize>,
1865 values: Vec<Scalar>,
1866}
1867
1868#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1869#[serde(rename_all = "snake_case")]
1870pub enum ArithmeticOp {
1871 Add,
1872 Sub,
1873 Mul,
1874 Div,
1875 Mod,
1876 Pow,
1877 FloorDiv,
1878}
1879
1880#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1885#[serde(rename_all = "snake_case")]
1886pub enum ComparisonOp {
1887 Gt,
1888 Lt,
1889 Eq,
1890 Ne,
1891 Ge,
1892 Le,
1893}
1894
1895fn nkeep_impl(col: &Column, n: usize, keep: &str, ascending: bool) -> Result<Column, ColumnError> {
1896 if !matches!(keep, "first" | "last" | "all") {
1897 return Err(ColumnError::Type(TypeError::NonNumericValue {
1898 value: keep.to_string(),
1899 dtype: col.dtype(),
1900 }));
1901 }
1902 let mut indexed: Vec<(usize, &Scalar)> = col.values().iter().enumerate().collect();
1906 indexed.sort_by(|a, b| {
1907 let primary = compare_scalars_na_last(a.1, b.1, ascending);
1908 match (primary, keep) {
1909 (std::cmp::Ordering::Equal, "last") => b.0.cmp(&a.0),
1911 (std::cmp::Ordering::Equal, _) => a.0.cmp(&b.0),
1912 _ => primary,
1913 }
1914 });
1915 let take = n.min(indexed.len());
1916 let mut end = take;
1917 if keep == "all" && take > 0 && take < indexed.len() {
1918 let boundary = indexed[take - 1].1;
1919 while end < indexed.len() {
1920 let same = compare_scalars_na_last(indexed[end].1, boundary, ascending).is_eq();
1921 if !same {
1922 break;
1923 }
1924 end += 1;
1925 }
1926 }
1927 let values: Vec<Scalar> = indexed[..end].iter().map(|(_, v)| (*v).clone()).collect();
1928 Column::new(col.dtype(), values)
1929}
1930
1931fn is_monotonic_in_direction(values: &[Scalar], increasing: bool) -> bool {
1932 let mut prev: Option<&Scalar> = None;
1933 for v in values {
1934 if v.is_missing() {
1935 continue;
1936 }
1937 if let Some(p) = prev {
1938 let ord = compare_scalars_na_last(p, v, true);
1939 let ok = matches!(
1943 (ord, increasing),
1944 (std::cmp::Ordering::Less, true)
1945 | (std::cmp::Ordering::Equal, _)
1946 | (std::cmp::Ordering::Greater, false)
1947 );
1948 if !ok {
1949 return false;
1950 }
1951 }
1952 prev = Some(v);
1953 }
1954 true
1955}
1956
1957fn compare_scalars_na_last(left: &Scalar, right: &Scalar, ascending: bool) -> std::cmp::Ordering {
1958 use std::cmp::Ordering;
1959 match (left.is_missing(), right.is_missing()) {
1960 (true, true) => Ordering::Equal,
1961 (true, false) => Ordering::Greater,
1963 (false, true) => Ordering::Less,
1964 (false, false) => {
1965 let ord = match (left, right) {
1966 (Scalar::Int64(a), Scalar::Int64(b)) => a.cmp(b),
1967 (Scalar::Float64(a), Scalar::Float64(b)) => {
1968 a.partial_cmp(b).unwrap_or(Ordering::Equal)
1969 }
1970 (Scalar::Bool(a), Scalar::Bool(b)) => a.cmp(b),
1971 (Scalar::Utf8(a), Scalar::Utf8(b)) => a.cmp(b),
1972 (Scalar::Timedelta64(a), Scalar::Timedelta64(b)) => a.cmp(b),
1973 (a, b) => match (a.to_f64(), b.to_f64()) {
1974 (Ok(af), Ok(bf)) => af.partial_cmp(&bf).unwrap_or(Ordering::Equal),
1975 _ => Ordering::Equal,
1976 },
1977 };
1978 if ascending { ord } else { ord.reverse() }
1979 }
1980 }
1981}
1982
1983#[derive(Clone, Copy)]
1985enum DupPolicy {
1986 First,
1987 Last,
1988 None,
1989}
1990
1991fn duplicated_flags_typed<T>(keys: &[T], policy: DupPolicy) -> Vec<bool>
1997where
1998 T: std::hash::Hash + Eq + Copy,
1999{
2000 let n = keys.len();
2001 let mut flags = vec![false; n];
2002 match policy {
2003 DupPolicy::First => {
2004 let mut seen: FxHashSet<T> = FxHashSet::with_capacity_and_hasher(n, Default::default());
2005 for (idx, &k) in keys.iter().enumerate() {
2006 flags[idx] = !seen.insert(k);
2007 }
2008 }
2009 DupPolicy::Last => {
2010 let mut seen: FxHashSet<T> = FxHashSet::with_capacity_and_hasher(n, Default::default());
2011 for (idx, &k) in keys.iter().enumerate().rev() {
2012 flags[idx] = !seen.insert(k);
2013 }
2014 }
2015 DupPolicy::None => {
2016 let mut seen_once: FxHashSet<T> =
2017 FxHashSet::with_capacity_and_hasher(n, Default::default());
2018 let mut seen_multiple: FxHashSet<T> = FxHashSet::default();
2019 for &k in keys {
2020 if !seen_once.insert(k) {
2021 seen_multiple.insert(k);
2022 }
2023 }
2024 for (idx, &k) in keys.iter().enumerate() {
2025 flags[idx] = seen_multiple.contains(&k);
2026 }
2027 }
2028 }
2029 flags
2030}
2031
2032const DUP_DIRECT_ADDRESS_CAP: u128 = 1 << 24;
2037
2038fn i64_direct_address_range(data: &[i64]) -> Option<(i64, usize)> {
2043 let mut min = data.first().copied()?;
2044 let mut max = min;
2045 for &v in &data[1..] {
2046 if v < min {
2047 min = v;
2048 } else if v > max {
2049 max = v;
2050 }
2051 }
2052 let range = (max as i128 - min as i128 + 1) as u128;
2053 if range <= DUP_DIRECT_ADDRESS_CAP && range <= (data.len() as u128).saturating_mul(16) {
2054 Some((min, range as usize))
2055 } else {
2056 None
2057 }
2058}
2059
2060fn duplicated_flags_i64_direct(
2065 data: &[i64],
2066 min: i64,
2067 range: usize,
2068 policy: DupPolicy,
2069) -> Vec<bool> {
2070 let n = data.len();
2071 let mut flags = vec![false; n];
2072 let slot = |v: i64| (v as i128 - min as i128) as usize;
2073 match policy {
2074 DupPolicy::First => {
2075 let mut seen = vec![false; range];
2076 for (idx, &v) in data.iter().enumerate() {
2077 let s = slot(v);
2078 flags[idx] = seen[s];
2079 seen[s] = true;
2080 }
2081 }
2082 DupPolicy::Last => {
2083 let mut seen = vec![false; range];
2084 for (idx, &v) in data.iter().enumerate().rev() {
2085 let s = slot(v);
2086 flags[idx] = seen[s];
2087 seen[s] = true;
2088 }
2089 }
2090 DupPolicy::None => {
2091 let mut count = vec![0u8; range];
2093 for &v in data {
2094 let s = slot(v);
2095 if count[s] < 2 {
2096 count[s] += 1;
2097 }
2098 }
2099 for (idx, &v) in data.iter().enumerate() {
2100 flags[idx] = count[slot(v)] > 1;
2101 }
2102 }
2103 }
2104 flags
2105}
2106
2107#[inline]
2110fn i64_radix_key(value: i64) -> u64 {
2111 (value as u64) ^ (1u64 << 63)
2112}
2113
2114#[inline]
2122fn f64_radix_key(value: f64) -> u64 {
2123 let bits = (if value == 0.0 { 0.0 } else { value }).to_bits();
2124 if bits & (1u64 << 63) != 0 {
2125 !bits
2126 } else {
2127 bits | (1u64 << 63)
2128 }
2129}
2130
2131fn radix_argsort_u64(keys: &[u64]) -> Vec<usize> {
2138 let n = keys.len();
2139 let mut idx: Vec<usize> = (0..n).collect();
2140 if n < 2 {
2141 return idx;
2142 }
2143 let mut scratch: Vec<usize> = vec![0; n];
2144 for shift in (0..64).step_by(8) {
2145 let mut count = [0usize; 256];
2146 for &k in keys {
2147 count[((k >> shift) & 0xff) as usize] += 1;
2148 }
2149 if count.contains(&n) {
2152 continue;
2153 }
2154 let mut running = 0usize;
2155 for slot in &mut count {
2156 let c = *slot;
2157 *slot = running;
2158 running += c;
2159 }
2160 for &i in &idx {
2161 let bucket = ((keys[i] >> shift) & 0xff) as usize;
2162 scratch[count[bucket]] = i;
2163 count[bucket] += 1;
2164 }
2165 std::mem::swap(&mut idx, &mut scratch);
2166 }
2167 idx
2168}
2169
2170#[must_use]
2178pub fn radix_argsort_i64(values: &[i64], ascending: bool) -> Vec<usize> {
2179 let keys: Vec<u64> = if ascending {
2180 values.iter().map(|&v| i64_radix_key(v)).collect()
2181 } else {
2182 values.iter().map(|&v| !i64_radix_key(v)).collect()
2183 };
2184 radix_argsort_u64(&keys)
2185}
2186
2187pub fn radix_argsort_multi_u64(keys_by_col: &[Vec<u64>]) -> Vec<usize> {
2197 let n = keys_by_col.first().map_or(0, Vec::len);
2198 let mut idx: Vec<usize> = (0..n).collect();
2199 if n < 2 || keys_by_col.is_empty() {
2200 return idx;
2201 }
2202 let mut scratch: Vec<usize> = vec![0; n];
2203 for keys in keys_by_col.iter().rev() {
2204 for shift in (0..64).step_by(8) {
2205 let mut count = [0usize; 256];
2206 for &k in keys {
2207 count[((k >> shift) & 0xff) as usize] += 1;
2208 }
2209 if count.contains(&n) {
2210 continue;
2211 }
2212 let mut running = 0usize;
2213 for slot in &mut count {
2214 let c = *slot;
2215 *slot = running;
2216 running += c;
2217 }
2218 for &i in &idx {
2219 let bucket = ((keys[i] >> shift) & 0xff) as usize;
2220 scratch[count[bucket]] = i;
2221 count[bucket] += 1;
2222 }
2223 std::mem::swap(&mut idx, &mut scratch);
2224 }
2225 }
2226 idx
2227}
2228
2229fn utf8_msd_argsort(strs: &[&str], ascending: bool) -> Vec<usize> {
2241 let n = strs.len();
2242 let mut idx: Vec<usize> = (0..n).collect();
2243 if n > 1 {
2244 let mut aux: Vec<usize> = vec![0; n];
2245 utf8_msd_sort_range(strs, &mut idx, &mut aux, 0, n, 0, ascending);
2246 }
2247 idx
2248}
2249
2250fn utf8_msd_sort_range(
2251 strs: &[&str],
2252 idx: &mut [usize],
2253 aux: &mut [usize],
2254 lo: usize,
2255 hi: usize,
2256 depth: usize,
2257 ascending: bool,
2258) {
2259 let n = hi - lo;
2260 if n <= 1 {
2261 return;
2262 }
2263 const CUTOFF: usize = 48;
2268 const MAX_DEPTH: usize = 1024;
2269 if n <= CUTOFF || depth >= MAX_DEPTH {
2270 idx[lo..hi].sort_by(|&a, &b| {
2271 let ord = strs[a].as_bytes()[depth..].cmp(&strs[b].as_bytes()[depth..]);
2272 if ascending { ord } else { ord.reverse() }
2273 });
2274 return;
2275 }
2276 let key = |s: &str| -> usize {
2280 let b = s.as_bytes();
2281 if depth < b.len() {
2282 if ascending {
2283 b[depth] as usize + 1
2284 } else {
2285 255 - b[depth] as usize
2286 }
2287 } else if ascending {
2288 0
2289 } else {
2290 256
2291 }
2292 };
2293 let mut counts = [0usize; 258];
2294 for &i in idx[lo..hi].iter() {
2295 counts[key(strs[i]) + 1] += 1;
2296 }
2297 for k in 1..258 {
2298 counts[k] += counts[k - 1];
2299 }
2300 let mut offsets = counts;
2302 for &i in idx[lo..hi].iter() {
2303 let k = key(strs[i]);
2304 aux[lo + offsets[k]] = i;
2305 offsets[k] += 1;
2306 }
2307 idx[lo..hi].copy_from_slice(&aux[lo..hi]);
2308 let eos_bucket = if ascending { 0 } else { 256 };
2312 for k in 0..257 {
2313 if k == eos_bucket {
2314 continue;
2315 }
2316 let b_lo = lo + counts[k];
2317 let b_hi = lo + counts[k + 1];
2318 if b_hi - b_lo > 1 {
2319 utf8_msd_sort_range(strs, idx, aux, b_lo, b_hi, depth + 1, ascending);
2320 }
2321 }
2322}
2323
2324fn normalized_float_bits(value: f64) -> u64 {
2325 let normalized = if value == 0.0 { 0.0 } else { value };
2326 normalized.to_bits()
2327}
2328
2329fn interval_key(interval: &Interval) -> (u64, u64, IntervalClosed) {
2330 (
2331 normalized_float_bits(interval.left),
2332 normalized_float_bits(interval.right),
2333 interval.closed,
2334 )
2335}
2336
2337#[derive(Hash, PartialEq, Eq)]
2346enum SetMemberKey<'a> {
2347 Bool(bool),
2348 Int64(i64),
2349 FloatBits(u64),
2350 Utf8(&'a str),
2351 Timedelta64(i64),
2352 Datetime64(i64),
2353 Period(i64),
2354 Interval(u64, u64, IntervalClosed),
2355}
2356
2357fn set_member_key(v: &Scalar) -> Option<SetMemberKey<'_>> {
2358 Some(match v {
2359 Scalar::Bool(b) => SetMemberKey::Bool(*b),
2360 Scalar::Int64(i) => SetMemberKey::Int64(*i),
2361 Scalar::Float64(f) => {
2362 let norm = if *f == 0.0 { 0.0 } else { *f };
2363 SetMemberKey::FloatBits(norm.to_bits())
2364 }
2365 Scalar::Utf8(s) => SetMemberKey::Utf8(s.as_str()),
2366 Scalar::Timedelta64(v) => SetMemberKey::Timedelta64(*v),
2367 Scalar::Datetime64(v) => SetMemberKey::Datetime64(*v),
2368 Scalar::Period(v) => SetMemberKey::Period(*v),
2369 Scalar::Interval(v) => {
2370 let (left, right, closed) = interval_key(v);
2371 SetMemberKey::Interval(left, right, closed)
2372 }
2373 Scalar::Null(_) => return None,
2374 })
2375}
2376
2377#[derive(Debug, Error, Clone, PartialEq)]
2378pub enum ColumnError {
2379 #[error("column length mismatch: left={left}, right={right}")]
2380 LengthMismatch { left: usize, right: usize },
2381 #[error("{operation} requires exactly {expected} element(s), got {actual}")]
2382 InvalidLength {
2383 operation: &'static str,
2384 expected: usize,
2385 actual: usize,
2386 },
2387 #[error("invalid sorter permutation for column of length {len}: {reason}")]
2388 InvalidSorter { len: usize, reason: String },
2389 #[error("mask must be Bool dtype; found {dtype:?}")]
2390 InvalidMaskType { dtype: DType },
2391 #[error("column dtype mismatch: left={left:?}, right={right:?}")]
2392 DTypeMismatch { left: DType, right: DType },
2393 #[error("Integers to negative integer powers are not allowed.")]
2394 NegativeIntegerPower,
2395 #[error(transparent)]
2396 Type(#[from] TypeError),
2397}
2398
2399impl SparseColumn {
2400 pub fn from_dense(dtype: SparseDType, values: Vec<Scalar>) -> Result<Self, ColumnError> {
2401 let len = values.len();
2402 let value_dtype = dtype.value_dtype;
2403 let fill_value = dtype.fill_value.clone();
2404 let mut indices = Vec::new();
2405 let mut sparse_values = Vec::new();
2406
2407 for (idx, value) in values.into_iter().enumerate() {
2408 let value = if value.dtype() == value_dtype || value.dtype() == DType::Null {
2409 Column::normalize_missing_for_dtype(value, value_dtype)
2410 } else {
2411 cast_scalar_owned(value, value_dtype)?
2412 };
2413
2414 if !value.semantic_eq(&fill_value) {
2415 indices.push(idx);
2416 sparse_values.push(value);
2417 }
2418 }
2419
2420 Ok(Self {
2421 dtype,
2422 len,
2423 indices,
2424 values: sparse_values,
2425 })
2426 }
2427
2428 pub fn from_dense_column(dtype: SparseDType, column: &Column) -> Result<Self, ColumnError> {
2429 Self::from_dense(dtype, column.values().to_vec())
2430 }
2431
2432 #[must_use]
2433 pub fn sparse_dtype(&self) -> &SparseDType {
2434 &self.dtype
2435 }
2436
2437 #[must_use]
2438 pub fn value_dtype(&self) -> DType {
2439 self.dtype.value_dtype
2440 }
2441
2442 #[must_use]
2443 pub fn fill_value(&self) -> &Scalar {
2444 &self.dtype.fill_value
2445 }
2446
2447 #[must_use]
2448 pub fn len(&self) -> usize {
2449 self.len
2450 }
2451
2452 #[must_use]
2453 pub fn is_empty(&self) -> bool {
2454 self.len == 0
2455 }
2456
2457 #[must_use]
2458 pub fn indices(&self) -> &[usize] {
2459 &self.indices
2460 }
2461
2462 #[must_use]
2463 pub fn stored_values(&self) -> &[Scalar] {
2464 &self.values
2465 }
2466
2467 #[must_use]
2468 pub fn npoints(&self) -> usize {
2469 self.values.len()
2470 }
2471
2472 #[must_use]
2473 pub fn density(&self) -> f64 {
2474 if self.len == 0 {
2475 0.0
2476 } else {
2477 self.values.len() as f64 / self.len as f64
2478 }
2479 }
2480
2481 #[must_use]
2482 pub fn to_dense_values(&self) -> Vec<Scalar> {
2483 let mut values = vec![self.dtype.fill_value.clone(); self.len];
2484 for (&idx, value) in self.indices.iter().zip(self.values.iter()) {
2485 values[idx] = value.clone();
2486 }
2487 values
2488 }
2489
2490 pub fn to_dense_column(&self) -> Result<Column, ColumnError> {
2491 Column::new(self.dtype.value_dtype, self.to_dense_values())
2492 }
2493}
2494
2495fn saturating_i64_to_usize(value: i64) -> usize {
2496 if value <= 0 {
2497 0
2498 } else {
2499 usize::try_from(value).unwrap_or(usize::MAX)
2500 }
2501}
2502
2503fn saturating_i64_abs_to_usize(value: i64) -> usize {
2504 usize::try_from(value.unsigned_abs()).unwrap_or(usize::MAX)
2505}
2506
2507fn normalize_head_take(n: i64, len: usize) -> usize {
2508 if n >= 0 {
2509 saturating_i64_to_usize(n).min(len)
2510 } else {
2511 len.saturating_sub(saturating_i64_abs_to_usize(n))
2512 }
2513}
2514
2515fn normalize_tail_window(n: i64, len: usize) -> (usize, usize) {
2516 if n >= 0 {
2517 let take = saturating_i64_to_usize(n).min(len);
2518 (len - take, take)
2519 } else {
2520 let skip = saturating_i64_abs_to_usize(n).min(len);
2521 (skip, len - skip)
2522 }
2523}
2524
2525fn round_i64_negative_decimals(value: i64, decimals: i32) -> i64 {
2526 debug_assert!(decimals < 0);
2527 let factor = match 10_i128.checked_pow(decimals.unsigned_abs()) {
2528 Some(factor) => factor,
2529 None => return 0,
2530 };
2531 let magnitude = i128::from(value).abs();
2532 let quotient = magnitude / factor;
2533 let remainder = magnitude % factor;
2534 let rounded_magnitude = match (remainder * 2).cmp(&factor) {
2535 std::cmp::Ordering::Less => quotient * factor,
2536 std::cmp::Ordering::Greater => (quotient + 1) * factor,
2537 std::cmp::Ordering::Equal if quotient % 2 == 0 => quotient * factor,
2538 std::cmp::Ordering::Equal => (quotient + 1) * factor,
2539 };
2540 let rounded = if value < 0 {
2541 -rounded_magnitude
2542 } else {
2543 rounded_magnitude
2544 };
2545 match i64::try_from(rounded) {
2546 Ok(value) => value,
2547 Err(_) if rounded < 0 => i64::MIN,
2548 Err(_) => i64::MAX,
2549 }
2550}
2551
2552impl Column {
2553 fn clone_dense_values_from_cache(&self) -> Option<ScalarValues> {
2554 if self.validity.len() != self.values.len()
2555 || self.validity.count_valid() != self.values.len()
2556 {
2557 return None;
2558 }
2559
2560 match (&self.data, self.dtype) {
2561 (Some(ColumnData::Bool(data)), DType::Bool)
2562 if data.len() == self.values.len() =>
2563 {
2564 Some(ScalarValues::lazy_all_valid_bool(data.clone()))
2573 }
2574 (Some(ColumnData::Int64(data)), DType::Int64)
2575 if data.len() == self.values.len() =>
2576 {
2577 Some(ScalarValues::lazy_all_valid_int64(data.clone()))
2587 }
2588 (Some(ColumnData::Float64(data)), DType::Float64)
2589 if data.len() == self.values.len() =>
2590 {
2591 Some(ScalarValues::lazy_all_valid_float64(data.clone()))
2592 }
2593 (Some(ColumnData::Timedelta64(data)), DType::Timedelta64)
2594 if data.len() == self.values.len() =>
2595 {
2596 Some(ScalarValues::from_vec(
2597 data.iter().copied().map(Scalar::Timedelta64).collect(),
2598 ))
2599 }
2600 (Some(ColumnData::Datetime64(data)), DType::Datetime64)
2601 if data.len() == self.values.len() =>
2602 {
2603 Some(ScalarValues::from_vec(
2604 data.iter().copied().map(Scalar::Datetime64).collect(),
2605 ))
2606 }
2607 (Some(ColumnData::Period(data)), DType::Period) if data.len() == self.values.len() => {
2608 Some(ScalarValues::from_vec(
2609 data.iter().copied().map(Scalar::Period).collect(),
2610 ))
2611 }
2612 _ => None,
2613 }
2614 }
2615
2616 fn cached_data_for_values(dtype: DType, values: &[Scalar]) -> Option<ColumnData> {
2617 match dtype {
2618 DType::Bool
2619 | DType::BoolNullable
2620 | DType::Int64
2621 | DType::Int64Nullable
2622 | DType::Float64
2623 | DType::Timedelta64
2624 | DType::Datetime64
2625 | DType::Period => Some(ColumnData::from_scalars(values, dtype)),
2626 _ => None,
2627 }
2628 }
2629
2630 fn normalize_missing_for_dtype(value: Scalar, dtype: DType) -> Scalar {
2631 match value {
2632 Scalar::Null(NullKind::NaN) => Scalar::Null(NullKind::NaN),
2633 Scalar::Null(NullKind::NaT) => Scalar::Null(NullKind::NaT),
2634 Scalar::Null(_) => Scalar::missing_for_dtype(dtype),
2635 other => other,
2636 }
2637 }
2638
2639 pub fn new(dtype: DType, values: Vec<Scalar>) -> Result<Self, ColumnError> {
2643 let preserve_utf8_object_bucket = matches!(dtype, DType::Utf8)
2644 && values.iter().any(|value| matches!(value, Scalar::Utf8(_)))
2645 && values
2646 .iter()
2647 .any(|value| !matches!(value, Scalar::Utf8(_) | Scalar::Null(_)));
2648 let needs_coercion = values.iter().any(|v| {
2649 let d = v.dtype();
2650 d != dtype && d != DType::Null
2651 }) && !preserve_utf8_object_bucket;
2652
2653 let coerced = if preserve_utf8_object_bucket {
2654 values
2655 .into_iter()
2656 .map(|value| Self::normalize_missing_for_dtype(value, dtype))
2657 .collect()
2658 } else if needs_coercion {
2659 values
2660 .into_iter()
2661 .map(|value| {
2662 if matches!(dtype, DType::Int64 | DType::Int64Nullable)
2670 && let Scalar::Float64(v) = &value
2671 && v.is_finite()
2672 && v.fract() != 0.0
2673 {
2674 return Err(TypeError::LossyFloatToInt { value: *v });
2675 }
2676 cast_scalar_owned(value, dtype)
2677 })
2678 .collect::<Result<Vec<_>, _>>()?
2679 } else {
2680 values
2683 .into_iter()
2684 .map(|value| Self::normalize_missing_for_dtype(value, dtype))
2685 .collect()
2686 };
2687
2688 let validity = ValidityMask::from_values(&coerced);
2689
2690 Ok(Self {
2691 dtype,
2692 validity,
2693 data: Self::cached_data_for_values(dtype, &coerced),
2694 values: ScalarValues::from_vec(coerced),
2695 })
2696 }
2697
2698 pub fn from_values(values: Vec<Scalar>) -> Result<Self, ColumnError> {
2699 let dtype = infer_dtype(&values)?;
2700 Self::new(dtype, values)
2701 }
2702
2703 #[must_use]
2709 pub fn from_i64_values(data: Vec<i64>) -> Self {
2710 let len = data.len();
2711 Self {
2712 dtype: DType::Int64,
2713 values: ScalarValues::lazy_all_valid_int64(data),
2714 validity: ValidityMask::all_valid(len),
2715 data: None,
2716 }
2717 }
2718
2719 #[must_use]
2727 #[doc(hidden)]
2728 pub fn from_utf8_contiguous(bytes: Vec<u8>, offsets: Vec<usize>) -> Self {
2729 let len = offsets.len().saturating_sub(1);
2730 Self {
2731 dtype: DType::Utf8,
2732 values: ScalarValues::lazy_contiguous_utf8(bytes, offsets),
2733 validity: ValidityMask::all_valid(len),
2734 data: None,
2735 }
2736 }
2737
2738 #[must_use]
2744 #[doc(hidden)]
2745 pub fn from_i64_repeat_runs(runs: Vec<(i64, usize)>) -> Self {
2746 let total_len = runs.iter().map(|&(_, run_len)| run_len).sum();
2747 Self {
2748 dtype: DType::Int64,
2749 values: ScalarValues::lazy_repeat_runs_int64(runs, total_len),
2750 validity: ValidityMask::all_valid(total_len),
2751 data: None,
2752 }
2753 }
2754
2755 #[must_use]
2760 #[doc(hidden)]
2761 pub fn from_i64_repeat_values_run_lengths(
2762 run_values: Vec<i64>,
2763 run_lens: Arc<[usize]>,
2764 ) -> Self {
2765 let total_len = run_lens.iter().sum();
2766 Self {
2767 dtype: DType::Int64,
2768 values: ScalarValues::lazy_repeat_values_int64(run_values, run_lens, total_len),
2769 validity: ValidityMask::all_valid(total_len),
2770 data: None,
2771 }
2772 }
2773
2774 #[must_use]
2779 #[doc(hidden)]
2780 pub fn from_i64_repeated_slices(data: Vec<i64>, segments: Vec<(usize, usize)>) -> Self {
2781 let total_len = segments.iter().map(|&(_, len)| len).sum();
2782 Self {
2783 dtype: DType::Int64,
2784 values: ScalarValues::lazy_repeated_slices_int64(data, segments, total_len),
2785 validity: ValidityMask::all_valid(total_len),
2786 data: None,
2787 }
2788 }
2789
2790 #[must_use]
2793 #[doc(hidden)]
2794 pub fn from_i64_repeated_slices_shared(
2795 data: Vec<i64>,
2796 segments: Arc<[(usize, usize)]>,
2797 total_len: usize,
2798 ) -> Self {
2799 Self {
2800 dtype: DType::Int64,
2801 values: ScalarValues::lazy_repeated_slices_int64_shared(data, segments, total_len),
2802 validity: ValidityMask::all_valid(total_len),
2803 data: None,
2804 }
2805 }
2806
2807 #[must_use]
2813 pub fn from_f64_values(data: Vec<f64>) -> Self {
2814 let len = data.len();
2815 let validity = if data.iter().any(|v| v.is_nan()) {
2823 ValidityMask::from_f64(&data)
2824 } else {
2825 ValidityMask::all_valid(len)
2826 };
2827 Self {
2828 dtype: DType::Float64,
2829 values: ScalarValues::lazy_all_valid_float64(data),
2830 validity,
2831 data: None,
2832 }
2833 }
2834
2835 #[doc(hidden)]
2839 pub fn from_f64_values_with_validity(data: Vec<f64>, validity: ValidityMask) -> Self {
2840 debug_assert_eq!(data.len(), validity.len());
2841 if validity.all() {
2842 return Self::from_f64_values(data);
2843 }
2844 Self {
2845 dtype: DType::Float64,
2846 values: ScalarValues::lazy_nullable_float64(data, validity.clone()),
2847 validity,
2848 data: None,
2849 }
2850 }
2851
2852 #[doc(hidden)]
2858 pub fn from_i64_values_with_validity(data: Vec<i64>, validity: ValidityMask) -> Self {
2859 debug_assert_eq!(data.len(), validity.len());
2860 if validity.all() {
2861 return Self::from_i64_values(data);
2862 }
2863 Self {
2864 dtype: DType::Int64,
2865 values: ScalarValues::lazy_nullable_int64(data, validity.clone()),
2866 validity,
2867 data: None,
2868 }
2869 }
2870
2871 #[must_use]
2884 #[doc(hidden)]
2885 pub fn reindex_promote_float64_by_optional_positions(
2886 &self,
2887 positions: &[Option<usize>],
2888 ) -> Option<Self> {
2889 enum TypedSource<'a> {
2890 Int64(&'a [i64]),
2891 Float64(&'a [f64]),
2892 }
2893 let source = if let Some(slice) = self.as_i64_slice() {
2894 TypedSource::Int64(slice)
2895 } else {
2896 let slice = self.as_f64_slice()?;
2897 TypedSource::Float64(slice)
2898 };
2899
2900 let n = positions.len();
2901 let len = self.len();
2902 let mut data = Vec::with_capacity(n);
2903 let mut words = vec![0_u64; n.div_ceil(64)];
2904 for (out_idx, slot) in positions.iter().enumerate() {
2905 match slot {
2906 Some(idx) if *idx < len => {
2907 let value = match source {
2908 TypedSource::Int64(slice) => slice[*idx] as f64,
2909 TypedSource::Float64(slice) => slice[*idx],
2910 };
2911 data.push(value);
2912 words[out_idx / 64] |= 1_u64 << (out_idx % 64);
2915 }
2916 _ => data.push(0.0),
2917 }
2918 }
2919 Some(Self::from_f64_values_with_validity(
2920 data,
2921 ValidityMask { words, len: n },
2922 ))
2923 }
2924
2925 #[must_use]
2932 pub fn as_f64_slice(&self) -> Option<&[f64]> {
2933 if self.dtype == DType::Float64 && self.validity.all() {
2934 if let Some(ColumnData::Float64(data)) = &self.data {
2935 return Some(data.as_slice());
2936 }
2937 if let ScalarValues::LazyAllValidFloat64 { data, .. } = &self.values {
2938 return Some(data.as_ref());
2939 }
2940 }
2941 None
2942 }
2943
2944 #[must_use]
2947 pub fn as_i64_slice(&self) -> Option<&[i64]> {
2948 if self.dtype == DType::Int64 && self.validity.all() {
2949 if let Some(ColumnData::Int64(data)) = &self.data {
2950 return Some(data.as_slice());
2951 }
2952 if let ScalarValues::LazyAllValidInt64 { data, .. } = &self.values {
2953 return Some(data.as_ref());
2954 }
2955 if let Some(data) = self.values.repeat_runs_i64_data() {
2956 return Some(data);
2957 }
2958 if let Some(data) = self.values.repeated_slices_i64_data() {
2959 return Some(data);
2960 }
2961 }
2962 None
2963 }
2964
2965 #[must_use]
2971 pub fn from_bool_values(data: Vec<bool>) -> Self {
2972 let len = data.len();
2973 Self {
2974 dtype: DType::Bool,
2975 values: ScalarValues::lazy_all_valid_bool(data),
2976 validity: ValidityMask::all_valid(len),
2977 data: None,
2978 }
2979 }
2980
2981 #[must_use]
2989 #[doc(hidden)]
2990 pub fn as_utf8_contiguous(&self) -> Option<(&[u8], &[usize])> {
2991 if self.dtype == DType::Utf8
2992 && self.validity.all()
2993 && let ScalarValues::LazyContiguousUtf8 { bytes, offsets, .. } = &self.values
2994 {
2995 return Some((bytes.as_ref(), offsets.as_ref()));
2996 }
2997 None
2998 }
2999
3000 fn utf8_arc_view_source(&self) -> Option<Utf8ArcViewSource> {
3006 if self.dtype != DType::Utf8 || !self.validity.all() {
3007 return None;
3008 }
3009 match &self.values {
3010 ScalarValues::LazyContiguousUtf8 { bytes, offsets, .. } => {
3011 Some((Arc::clone(bytes), Arc::clone(offsets), 0))
3012 }
3013 ScalarValues::LazyUtf8Slice {
3014 bytes,
3015 offsets,
3016 start,
3017 ..
3018 } => Some((Arc::clone(bytes), Arc::clone(offsets), *start)),
3019 _ => None,
3020 }
3021 }
3022
3023 #[must_use]
3027 #[doc(hidden)]
3028 pub fn as_strictly_increasing_utf8_contiguous(&self) -> Option<(&[u8], &[usize])> {
3029 if self.dtype == DType::Utf8
3030 && self.validity.all()
3031 && let ScalarValues::LazyContiguousUtf8 {
3032 bytes,
3033 offsets,
3034 strictly_increasing,
3035 ..
3036 } = &self.values
3037 && *strictly_increasing
3038 .get_or_init(|| contiguous_utf8_offsets_are_strictly_increasing(bytes, offsets))
3039 {
3040 return Some((bytes.as_ref(), offsets.as_ref()));
3041 }
3042 None
3043 }
3044
3045 #[must_use]
3051 pub fn as_fixed_width_strictly_increasing_utf8_contiguous(
3052 &self,
3053 ) -> Option<(&[u8], &[usize], usize)> {
3054 if self.dtype == DType::Utf8
3055 && self.validity.all()
3056 && let ScalarValues::LazyContiguousUtf8 {
3057 bytes,
3058 offsets,
3059 strictly_increasing,
3060 fixed_width,
3061 ..
3062 } = &self.values
3063 && *strictly_increasing
3064 .get_or_init(|| contiguous_utf8_offsets_are_strictly_increasing(bytes, offsets))
3065 {
3066 let width = fixed_width
3067 .get_or_init(|| contiguous_utf8_fixed_width(offsets))
3068 .as_ref()
3069 .copied()?;
3070 return Some((bytes.as_ref(), offsets.as_ref(), width));
3071 }
3072 None
3073 }
3074
3075 #[must_use]
3078 pub fn as_bool_slice(&self) -> Option<&[bool]> {
3079 if self.dtype == DType::Bool && self.validity.all() {
3080 if let Some(ColumnData::Bool(data)) = &self.data {
3081 return Some(data.as_slice());
3082 }
3083 if let ScalarValues::LazyAllValidBool { data, .. } = &self.values {
3084 return Some(data.as_ref());
3085 }
3086 }
3087 None
3088 }
3089
3090 #[must_use]
3113 pub fn take_positions(&self, positions: &[usize]) -> Self {
3114 let n = positions.len();
3115 if self.validity.all() {
3116 if let Some(data) = self.take_cached_all_valid_float64_positions(positions) {
3117 return Self {
3118 dtype: self.dtype,
3119 values: ScalarValues::lazy_all_valid_float64(data),
3120 validity: ValidityMask::all_valid(n),
3121 data: None,
3122 };
3123 }
3124
3125 if let Some(data) = self.take_cached_all_valid_int64_positions(positions) {
3131 return Self {
3132 dtype: self.dtype,
3133 values: ScalarValues::lazy_all_valid_int64(data),
3134 validity: ValidityMask::all_valid(n),
3135 data: None,
3136 };
3137 }
3138
3139 if n >= 64
3154 && let Some((src_bytes, src_offsets, src_start)) = self.utf8_arc_view_source()
3155 && let Some(range_start) = contiguous_ascending_start(positions)
3156 {
3157 return Self {
3158 dtype: self.dtype,
3159 values: ScalarValues::lazy_utf8_slice(
3160 src_bytes,
3161 src_offsets,
3162 src_start + range_start,
3163 n,
3164 ),
3165 validity: ValidityMask::all_valid(n),
3166 data: None,
3167 };
3168 }
3169
3170 if let Some((bytes, offsets)) = self.as_utf8_contiguous() {
3178 let total: usize = positions
3179 .iter()
3180 .map(|&pos| offsets[pos + 1] - offsets[pos])
3181 .sum();
3182 let mut new_bytes = Vec::with_capacity(total);
3183 let mut new_offsets = Vec::with_capacity(n + 1);
3184 new_offsets.push(0);
3185 for &pos in positions {
3186 new_bytes.extend_from_slice(&bytes[offsets[pos]..offsets[pos + 1]]);
3187 new_offsets.push(new_bytes.len());
3188 }
3189 return Self {
3190 dtype: self.dtype,
3191 values: ScalarValues::lazy_contiguous_utf8(new_bytes, new_offsets),
3192 validity: ValidityMask::all_valid(n),
3193 data: None,
3194 };
3195 }
3196
3197 let values = self
3198 .take_all_valid_primitive_positions(positions)
3199 .unwrap_or_else(|| {
3200 positions
3201 .iter()
3202 .map(|&pos| self.values[pos].clone())
3203 .collect()
3204 });
3205 return Self {
3206 dtype: self.dtype,
3207 values: ScalarValues::from_vec(values),
3208 validity: ValidityMask::all_valid(n),
3209 data: None,
3210 };
3211 }
3212
3213 if let ScalarValues::LazyNullableFloat64 { data: src, .. } = &self.values {
3222 let mut data = Vec::with_capacity(n);
3223 let mut words = vec![0_u64; n.div_ceil(64)];
3224 for (out_idx, &pos) in positions.iter().enumerate() {
3225 let x = src[pos];
3226 data.push(x);
3227 if self.validity.get(pos) && !x.is_nan() {
3228 words[out_idx / 64] |= 1_u64 << (out_idx % 64);
3229 }
3230 }
3231 return Self::from_f64_values_with_validity(data, ValidityMask { words, len: n });
3232 }
3233
3234 let mut values = Vec::with_capacity(n);
3235 let mut words = vec![0_u64; n.div_ceil(64)];
3236 for (out_idx, &pos) in positions.iter().enumerate() {
3237 let value = Self::normalize_missing_for_dtype(self.values[pos].clone(), self.dtype);
3238 if !value.is_missing() {
3239 words[out_idx / 64] |= 1_u64 << (out_idx % 64);
3240 }
3241 values.push(value);
3242 }
3243 Self {
3244 dtype: self.dtype,
3245 values: ScalarValues::from_vec(values),
3246 validity: ValidityMask { words, len: n },
3247 data: None,
3248 }
3249 }
3250
3251 fn take_cached_all_valid_float64_positions(&self, positions: &[usize]) -> Option<Vec<f64>> {
3252 let data = self.as_f64_slice()?;
3253 let mut values = Vec::with_capacity(positions.len());
3254 for &pos in positions {
3255 values.push(data[pos]);
3256 }
3257 Some(values)
3258 }
3259
3260 fn take_cached_all_valid_int64_positions(&self, positions: &[usize]) -> Option<Vec<i64>> {
3261 let data = self.as_i64_slice()?;
3262 let mut values = Vec::with_capacity(positions.len());
3263 for &pos in positions {
3264 values.push(data[pos]);
3265 }
3266 Some(values)
3267 }
3268
3269 fn take_all_valid_primitive_positions(&self, positions: &[usize]) -> Option<Vec<Scalar>> {
3270 if let Some(values) = self.take_cached_all_valid_primitive_positions(positions) {
3271 return Some(values);
3272 }
3273
3274 let mut values = Vec::with_capacity(positions.len());
3275 match self.dtype {
3276 DType::Bool | DType::BoolNullable => {
3277 for &pos in positions {
3278 match &self.values[pos] {
3279 Scalar::Bool(value) => values.push(Scalar::Bool(*value)),
3280 _ => return None,
3281 }
3282 }
3283 }
3284 DType::Int64 | DType::Int64Nullable => {
3285 for &pos in positions {
3286 match &self.values[pos] {
3287 Scalar::Int64(value) => values.push(Scalar::Int64(*value)),
3288 _ => return None,
3289 }
3290 }
3291 }
3292 DType::Float64 => {
3293 for &pos in positions {
3294 match &self.values[pos] {
3295 Scalar::Float64(value) => values.push(Scalar::Float64(*value)),
3296 _ => return None,
3297 }
3298 }
3299 }
3300 DType::Timedelta64 => {
3301 for &pos in positions {
3302 match &self.values[pos] {
3303 Scalar::Timedelta64(value) => values.push(Scalar::Timedelta64(*value)),
3304 _ => return None,
3305 }
3306 }
3307 }
3308 DType::Datetime64 => {
3309 for &pos in positions {
3310 match &self.values[pos] {
3311 Scalar::Datetime64(value) => values.push(Scalar::Datetime64(*value)),
3312 _ => return None,
3313 }
3314 }
3315 }
3316 DType::Period => {
3317 for &pos in positions {
3318 match &self.values[pos] {
3319 Scalar::Period(value) => values.push(Scalar::Period(*value)),
3320 _ => return None,
3321 }
3322 }
3323 }
3324 _ => return None,
3325 }
3326 Some(values)
3327 }
3328
3329 fn take_cached_all_valid_primitive_positions(
3330 &self,
3331 positions: &[usize],
3332 ) -> Option<Vec<Scalar>> {
3333 match self.dtype {
3334 DType::Bool => {
3335 if let Some(data) = self.as_bool_slice() {
3336 let mut values = Vec::with_capacity(positions.len());
3337 for &pos in positions {
3338 values.push(Scalar::Bool(data[pos]));
3339 }
3340 return Some(values);
3341 }
3342 }
3343 DType::Int64 => {
3344 if let Some(data) = self.as_i64_slice() {
3345 let mut values = Vec::with_capacity(positions.len());
3346 for &pos in positions {
3347 values.push(Scalar::Int64(data[pos]));
3348 }
3349 return Some(values);
3350 }
3351 }
3352 DType::Float64 => {
3353 if let Some(data) = self.as_f64_slice() {
3354 let mut values = Vec::with_capacity(positions.len());
3355 for &pos in positions {
3356 values.push(Scalar::Float64(data[pos]));
3357 }
3358 return Some(values);
3359 }
3360 }
3361 _ => {}
3362 }
3363
3364 let data = self.data.as_ref()?;
3365 let mut values = Vec::with_capacity(positions.len());
3366 match (self.dtype, data) {
3367 (DType::Bool | DType::BoolNullable, ColumnData::Bool(data)) => {
3368 for &pos in positions {
3369 values.push(Scalar::Bool(data[pos]));
3370 }
3371 }
3372 (DType::Int64 | DType::Int64Nullable, ColumnData::Int64(data)) => {
3373 for &pos in positions {
3374 values.push(Scalar::Int64(data[pos]));
3375 }
3376 }
3377 (DType::Float64, ColumnData::Float64(data)) => {
3378 for &pos in positions {
3379 values.push(Scalar::Float64(data[pos]));
3380 }
3381 }
3382 (DType::Timedelta64, ColumnData::Timedelta64(data)) => {
3383 for &pos in positions {
3384 values.push(Scalar::Timedelta64(data[pos]));
3385 }
3386 }
3387 (DType::Datetime64, ColumnData::Datetime64(data)) => {
3388 for &pos in positions {
3389 values.push(Scalar::Datetime64(data[pos]));
3390 }
3391 }
3392 (DType::Period, ColumnData::Period(data)) => {
3393 for &pos in positions {
3394 values.push(Scalar::Period(data[pos]));
3395 }
3396 }
3397 _ => return None,
3398 }
3399 Some(values)
3400 }
3401
3402 pub fn zeros(n: usize, dtype: DType) -> Result<Self, ColumnError> {
3406 let zero = match dtype {
3407 DType::Int64 => Scalar::Int64(0),
3408 DType::Float64 => Scalar::Float64(0.0),
3409 DType::Bool => Scalar::Bool(false),
3410 _ => Scalar::Int64(0),
3411 };
3412 Self::new(dtype, vec![zero; n])
3413 }
3414
3415 pub fn ones(n: usize, dtype: DType) -> Result<Self, ColumnError> {
3419 let one = match dtype {
3420 DType::Int64 => Scalar::Int64(1),
3421 DType::Float64 => Scalar::Float64(1.0),
3422 DType::Bool => Scalar::Bool(true),
3423 _ => Scalar::Int64(1),
3424 };
3425 Self::new(dtype, vec![one; n])
3426 }
3427
3428 pub fn full(n: usize, fill_value: Scalar) -> Result<Self, ColumnError> {
3432 let dtype = fill_value.dtype();
3433 Self::new(dtype, vec![fill_value; n])
3434 }
3435
3436 pub fn zeros_like(&self) -> Result<Self, ColumnError> {
3438 Self::zeros(self.len(), self.dtype)
3439 }
3440
3441 pub fn ones_like(&self) -> Result<Self, ColumnError> {
3443 Self::ones(self.len(), self.dtype)
3444 }
3445
3446 pub fn full_like(&self, fill_value: Scalar) -> Result<Self, ColumnError> {
3448 Self::new(self.dtype, vec![fill_value; self.len()])
3449 }
3450
3451 pub fn empty_like(&self) -> Result<Self, ColumnError> {
3453 Self::new(self.dtype, Vec::new())
3454 }
3455
3456 pub fn arange(start: f64, stop: f64, step: f64) -> Result<Self, ColumnError> {
3460 if step == 0.0 {
3461 return Err(ColumnError::Type(TypeError::NonNumericValue {
3462 value: "step cannot be zero".to_string(),
3463 dtype: DType::Float64,
3464 }));
3465 }
3466 let mut values = Vec::new();
3467 let mut x = start;
3468 if step > 0.0 {
3469 while x < stop {
3470 values.push(Scalar::Float64(x));
3471 x += step;
3472 }
3473 } else {
3474 while x > stop {
3475 values.push(Scalar::Float64(x));
3476 x += step;
3477 }
3478 }
3479 Self::new(DType::Float64, values)
3480 }
3481
3482 pub fn linspace(start: f64, stop: f64, num: usize) -> Result<Self, ColumnError> {
3486 if num == 0 {
3487 return Self::new(DType::Float64, Vec::new());
3488 }
3489 if num == 1 {
3490 return Self::new(DType::Float64, vec![Scalar::Float64(start)]);
3491 }
3492 let step = (stop - start) / (num - 1) as f64;
3493 let values: Vec<Scalar> = (0..num)
3494 .map(|i| Scalar::Float64(start + step * i as f64))
3495 .collect();
3496 Self::new(DType::Float64, values)
3497 }
3498
3499 pub fn logspace(start: f64, stop: f64, num: usize) -> Result<Self, ColumnError> {
3503 let lin = Self::linspace(start, stop, num)?;
3504 let values: Vec<Scalar> = lin
3505 .values()
3506 .iter()
3507 .map(|v| match v {
3508 Scalar::Float64(x) => Scalar::Float64(10.0_f64.powf(*x)),
3509 _ => v.clone(),
3510 })
3511 .collect();
3512 Self::new(DType::Float64, values)
3513 }
3514
3515 pub fn geomspace(start: f64, stop: f64, num: usize) -> Result<Self, ColumnError> {
3520 if num == 0 {
3521 return Self::new(DType::Float64, vec![]);
3522 }
3523 if start == 0.0 || stop == 0.0 {
3524 return Err(ColumnError::Type(TypeError::NonNumericValue {
3525 value: "geomspace endpoints cannot be zero".to_owned(),
3526 dtype: DType::Float64,
3527 }));
3528 }
3529 if num == 1 {
3530 return Self::new(DType::Float64, vec![Scalar::Float64(start)]);
3531 }
3532
3533 let log_start = start.ln();
3534 let log_stop = stop.ln();
3535 let step = (log_stop - log_start) / (num - 1) as f64;
3536 let values: Vec<Scalar> = (0..num)
3537 .map(|i| Scalar::Float64((log_start + step * i as f64).exp()))
3538 .collect();
3539 Self::new(DType::Float64, values)
3540 }
3541
3542 pub fn hanning(m: usize) -> Result<Self, ColumnError> {
3546 if m == 0 {
3547 return Self::new(DType::Float64, vec![]);
3548 }
3549 if m == 1 {
3550 return Self::new(DType::Float64, vec![Scalar::Float64(1.0)]);
3551 }
3552 let values: Vec<Scalar> = (0..m)
3553 .map(|n| {
3554 let val =
3555 0.5 - 0.5 * (2.0 * std::f64::consts::PI * n as f64 / (m - 1) as f64).cos();
3556 Scalar::Float64(val)
3557 })
3558 .collect();
3559 Self::new(DType::Float64, values)
3560 }
3561
3562 pub fn hamming(m: usize) -> Result<Self, ColumnError> {
3566 if m == 0 {
3567 return Self::new(DType::Float64, vec![]);
3568 }
3569 if m == 1 {
3570 return Self::new(DType::Float64, vec![Scalar::Float64(1.0)]);
3571 }
3572 let values: Vec<Scalar> = (0..m)
3573 .map(|n| {
3574 let val =
3575 0.54 - 0.46 * (2.0 * std::f64::consts::PI * n as f64 / (m - 1) as f64).cos();
3576 Scalar::Float64(val)
3577 })
3578 .collect();
3579 Self::new(DType::Float64, values)
3580 }
3581
3582 pub fn blackman(m: usize) -> Result<Self, ColumnError> {
3586 if m == 0 {
3587 return Self::new(DType::Float64, vec![]);
3588 }
3589 if m == 1 {
3590 return Self::new(DType::Float64, vec![Scalar::Float64(1.0)]);
3591 }
3592 let values: Vec<Scalar> = (0..m)
3593 .map(|n| {
3594 let x = n as f64 / (m - 1) as f64;
3595 let val = 0.42 - 0.5 * (2.0 * std::f64::consts::PI * x).cos()
3596 + 0.08 * (4.0 * std::f64::consts::PI * x).cos();
3597 Scalar::Float64(val)
3598 })
3599 .collect();
3600 Self::new(DType::Float64, values)
3601 }
3602
3603 pub fn bartlett(m: usize) -> Result<Self, ColumnError> {
3607 if m == 0 {
3608 return Self::new(DType::Float64, vec![]);
3609 }
3610 if m == 1 {
3611 return Self::new(DType::Float64, vec![Scalar::Float64(1.0)]);
3612 }
3613 let half = (m - 1) as f64 / 2.0;
3614 let values: Vec<Scalar> = (0..m)
3615 .map(|n| {
3616 let val = 1.0 - ((n as f64 - half) / half).abs();
3617 Scalar::Float64(val)
3618 })
3619 .collect();
3620 Self::new(DType::Float64, values)
3621 }
3622
3623 #[must_use]
3624 pub fn dtype(&self) -> DType {
3625 self.dtype
3626 }
3627
3628 #[must_use]
3630 pub fn has_nulls(&self) -> bool {
3631 self.validity.count_invalid() > 0
3632 }
3633
3634 #[must_use]
3639 pub fn promote_to_nullable(&self) -> Self {
3640 if !self.has_nulls() {
3641 return self.clone();
3642 }
3643 let new_dtype = self.dtype.to_nullable();
3644 if new_dtype == self.dtype {
3645 return self.clone();
3646 }
3647 Self {
3648 dtype: new_dtype,
3649 values: self.values.clone(),
3650 validity: self.validity.clone(),
3651 data: self.data.clone(),
3652 }
3653 }
3654
3655 #[must_use]
3661 pub fn with_dtype(&self, dtype: DType) -> Self {
3662 Self {
3663 dtype,
3664 values: self.values.clone(),
3665 validity: self.validity.clone(),
3666 data: None,
3667 }
3668 }
3669
3670 #[must_use]
3671 pub fn len(&self) -> usize {
3672 self.values.len()
3673 }
3674
3675 #[must_use]
3677 pub fn size(&self) -> usize {
3678 self.len()
3679 }
3680
3681 #[must_use]
3683 pub fn shape(&self) -> (usize,) {
3684 (self.len(),)
3685 }
3686
3687 #[must_use]
3689 pub fn ndim(&self) -> usize {
3690 1
3691 }
3692
3693 #[must_use]
3694 pub fn is_empty(&self) -> bool {
3695 self.values.is_empty()
3696 }
3697
3698 #[must_use]
3700 pub fn empty(&self) -> bool {
3701 self.is_empty()
3702 }
3703
3704 #[must_use]
3708 pub fn copy(&self) -> Self {
3709 self.clone()
3710 }
3711
3712 #[must_use]
3716 pub fn view(&self) -> Self {
3717 self.clone()
3718 }
3719
3720 #[must_use]
3724 pub fn transpose(&self) -> Self {
3725 self.clone()
3726 }
3727
3728 #[must_use]
3730 pub fn t(&self) -> Self {
3731 self.transpose()
3732 }
3733
3734 #[allow(non_snake_case)]
3736 #[must_use]
3737 pub fn T(&self) -> Self {
3738 self.transpose()
3739 }
3740
3741 #[must_use]
3742 pub fn values(&self) -> &[Scalar] {
3743 &self.values
3744 }
3745
3746 #[must_use]
3747 pub fn value(&self, idx: usize) -> Option<&Scalar> {
3748 self.values.get(idx)
3749 }
3750
3751 pub fn item(&self) -> Result<Scalar, ColumnError> {
3756 match self.values.as_slice() {
3757 [value] => Ok(value.clone()),
3758 values => Err(ColumnError::InvalidLength {
3759 operation: "item()",
3760 expected: 1,
3761 actual: values.len(),
3762 }),
3763 }
3764 }
3765
3766 #[must_use]
3767 pub fn validity(&self) -> &ValidityMask {
3768 &self.validity
3769 }
3770
3771 pub fn iter_values(&self) -> std::slice::Iter<'_, Scalar> {
3777 self.values.iter()
3778 }
3779
3780 #[must_use]
3786 pub fn to_vec(&self) -> Vec<Scalar> {
3787 self.values.to_vec()
3788 }
3789
3790 #[must_use]
3792 pub fn to_list(&self) -> Vec<Scalar> {
3793 self.to_vec()
3794 }
3795
3796 #[must_use]
3798 pub fn tolist(&self) -> Vec<Scalar> {
3799 self.to_list()
3800 }
3801
3802 #[must_use]
3804 pub fn to_numpy(&self) -> Vec<Scalar> {
3805 self.to_vec()
3806 }
3807
3808 #[must_use]
3810 pub fn ravel(&self) -> Vec<Scalar> {
3811 self.to_numpy()
3812 }
3813
3814 #[must_use]
3819 pub fn flatten(&self) -> Vec<Scalar> {
3820 self.values.to_vec()
3821 }
3822
3823 #[must_use]
3827 pub fn asarray(&self) -> Self {
3828 self.clone()
3829 }
3830
3831 #[must_use]
3833 pub fn array(&self) -> Vec<Scalar> {
3834 self.to_vec()
3835 }
3836
3837 #[must_use]
3843 pub fn has_any_missing(&self) -> bool {
3844 self.values.iter().any(Scalar::is_missing)
3845 }
3846
3847 #[must_use]
3849 pub fn hasnans(&self) -> bool {
3850 self.has_any_missing()
3851 }
3852
3853 #[must_use]
3859 pub fn all_missing(&self) -> bool {
3860 self.values.iter().all(Scalar::is_missing)
3861 }
3862
3863 #[must_use]
3869 pub fn first(&self) -> Option<&Scalar> {
3870 self.values.first()
3871 }
3872
3873 #[must_use]
3875 pub fn last(&self) -> Option<&Scalar> {
3876 self.values.last()
3877 }
3878
3879 pub fn count_matching<F>(&self, mut predicate: F) -> usize
3886 where
3887 F: FnMut(&Scalar) -> bool,
3888 {
3889 self.values
3890 .iter()
3891 .filter(|v| !v.is_missing() && predicate(v))
3892 .count()
3893 }
3894
3895 pub fn zip_with<F>(&self, other: &Self, mut func: F) -> Result<Self, ColumnError>
3903 where
3904 F: FnMut(&Scalar, &Scalar) -> Scalar,
3905 {
3906 if self.values.len() != other.values.len() {
3907 return Err(ColumnError::LengthMismatch {
3908 left: self.values.len(),
3909 right: other.values.len(),
3910 });
3911 }
3912 let out: Vec<Scalar> = self
3913 .values
3914 .iter()
3915 .zip(other.values.iter())
3916 .map(|(a, b)| func(a, b))
3917 .collect();
3918 let inferred = infer_dtype(&out).unwrap_or(self.dtype);
3919 Self::new(inferred, out)
3920 }
3921
3922 pub fn iter_enumerate(&self) -> std::iter::Enumerate<std::slice::Iter<'_, Scalar>> {
3928 self.values.iter().enumerate()
3929 }
3930
3931 pub fn apply_bool<F>(&self, mut predicate: F) -> Result<Self, ColumnError>
3939 where
3940 F: FnMut(&Scalar) -> bool,
3941 {
3942 let out: Vec<Scalar> = self
3943 .values
3944 .iter()
3945 .map(|v| {
3946 if v.is_missing() {
3947 Scalar::Bool(false)
3948 } else {
3949 Scalar::Bool(predicate(v))
3950 }
3951 })
3952 .collect();
3953 Self::new(DType::Bool, out)
3954 }
3955
3956 pub fn reindex_by_positions(&self, positions: &[Option<usize>]) -> Result<Self, ColumnError> {
3957 let mut present_positions = Vec::with_capacity(positions.len());
3958 let mut all_present = true;
3959 for position in positions {
3960 match position {
3961 Some(idx) if *idx < self.len() => present_positions.push(*idx),
3962 Some(_) | None => {
3963 all_present = false;
3964 break;
3965 }
3966 }
3967 }
3968 if all_present {
3969 return Ok(self.take_positions(&present_positions));
3970 }
3971
3972 let n = positions.len();
3979 if let Some(slice) = self.as_i64_slice() {
3980 let mut data = Vec::with_capacity(n);
3981 let mut words = vec![0_u64; n.div_ceil(64)];
3982 for (out_idx, slot) in positions.iter().enumerate() {
3983 match slot {
3984 Some(idx) if *idx < slice.len() => {
3985 data.push(slice[*idx]);
3986 words[out_idx / 64] |= 1_u64 << (out_idx % 64);
3987 }
3988 _ => data.push(0),
3989 }
3990 }
3991 return Ok(Self::from_i64_values_with_validity(
3992 data,
3993 ValidityMask { words, len: n },
3994 ));
3995 }
3996 if let Some(slice) = self.as_f64_slice() {
3997 let mut data = Vec::with_capacity(n);
3998 let mut words = vec![0_u64; n.div_ceil(64)];
3999 for (out_idx, slot) in positions.iter().enumerate() {
4000 match slot {
4001 Some(idx) if *idx < slice.len() => {
4002 data.push(slice[*idx]);
4003 words[out_idx / 64] |= 1_u64 << (out_idx % 64);
4004 }
4005 _ => data.push(0.0),
4006 }
4007 }
4008 return Ok(Self::from_f64_values_with_validity(
4009 data,
4010 ValidityMask { words, len: n },
4011 ));
4012 }
4013
4014 let values = positions
4015 .iter()
4016 .map(|slot| match slot {
4017 Some(idx) => self
4018 .values
4019 .get(*idx)
4020 .cloned()
4021 .unwrap_or_else(|| Scalar::missing_for_dtype(self.dtype)),
4022 None => Scalar::missing_for_dtype(self.dtype),
4023 })
4024 .collect::<Vec<_>>();
4025
4026 Self::new(self.dtype, values)
4027 }
4028
4029 fn try_vectorized_binary(
4035 &self,
4036 right: &Self,
4037 op: ArithmeticOp,
4038 out_dtype: DType,
4039 ) -> Option<Result<Self, ColumnError>> {
4040 match out_dtype {
4043 DType::Float64 => {
4044 if let (Some(l), Some(r)) = (self.as_f64_slice(), right.as_f64_slice()) {
4053 let apply = binary_f64_apply(op);
4054 let result: Vec<f64> = l.iter().zip(r).map(|(&a, &b)| apply(a, b)).collect();
4055 return Some(Ok(Self::from_f64_values(result)));
4056 }
4057 let left_data = ColumnData::from_scalars(&self.values, DType::Float64);
4058 let right_data = ColumnData::from_scalars(&right.values, DType::Float64);
4059 let (ColumnData::Float64(l), ColumnData::Float64(r)) = (&left_data, &right_data)
4060 else {
4061 return None;
4062 };
4063
4064 let left_nan_aware = self.nan_aware_validity();
4067 let right_nan_aware = right.nan_aware_validity();
4068
4069 let (result_data, result_validity) =
4070 vectorized_binary_f64(l, r, &left_nan_aware, &right_nan_aware, op);
4071
4072 if result_validity.all() {
4077 return Some(Ok(Self::from_f64_values(result_data)));
4078 }
4079
4080 let values: Vec<Scalar> = result_data
4083 .iter()
4084 .enumerate()
4085 .map(|(i, v)| {
4086 if !result_validity.get(i) {
4087 if self.is_nan_at(i) || right.is_nan_at(i) {
4089 Scalar::Null(NullKind::NaN)
4090 } else {
4091 Scalar::missing_for_dtype(out_dtype)
4092 }
4093 } else {
4094 Scalar::Float64(*v)
4095 }
4096 })
4097 .collect();
4098
4099 Some(Self::new(out_dtype, values))
4100 }
4101 DType::Int64 if !matches!(op, ArithmeticOp::Div) => {
4102 if self.dtype != DType::Int64 || right.dtype != DType::Int64 {
4104 return None;
4105 }
4106 if let (Some(l), Some(r)) = (self.as_i64_slice(), right.as_i64_slice()) {
4112 let (result_data, _validity) =
4113 vectorized_binary_i64(l, r, &self.validity, &right.validity, op)?;
4114 return Some(Ok(Self::from_i64_values(result_data)));
4115 }
4116 let left_data = ColumnData::from_scalars(&self.values, DType::Int64);
4117 let right_data = ColumnData::from_scalars(&right.values, DType::Int64);
4118 let (ColumnData::Int64(l), ColumnData::Int64(r)) = (&left_data, &right_data) else {
4119 return None;
4120 };
4121
4122 let (result_data, result_validity) =
4123 vectorized_binary_i64(l, r, &self.validity, &right.validity, op)?;
4124
4125 if result_validity.all() {
4128 return Some(Ok(Self::from_i64_values(result_data)));
4129 }
4130
4131 let values: Vec<Scalar> = result_data
4132 .iter()
4133 .enumerate()
4134 .map(|(i, v)| {
4135 if !result_validity.get(i) {
4136 Scalar::missing_for_dtype(out_dtype)
4137 } else {
4138 Scalar::Int64(*v)
4139 }
4140 })
4141 .collect();
4142
4143 Some(Self::new(out_dtype, values))
4144 }
4145 _ => None, }
4147 }
4148
4149 pub fn aligned_binary_f64(
4164 &self,
4165 right: &Self,
4166 left_positions: &[Option<usize>],
4167 right_positions: &[Option<usize>],
4168 op: ArithmeticOp,
4169 ) -> Result<Self, ColumnError> {
4170 debug_assert_eq!(left_positions.len(), right_positions.len());
4171 let out_len = left_positions.len();
4172
4173 let lsrc = self.float64_binary_data();
4174 let rsrc = right.float64_binary_data();
4175 let lvalid = self.nan_aware_validity();
4176 let rvalid = right.nan_aware_validity();
4177
4178 let apply = binary_f64_apply(op);
4179
4180 let mut data = Vec::with_capacity(out_len);
4181 let mut words = vec![0_u64; out_len.div_ceil(64)];
4182 let mut all_valid = true;
4183 for (k, left_slot) in left_positions.iter().enumerate() {
4184 if let Some(i) = *left_slot
4185 && let Some(j) = right_positions.get(k).copied().flatten()
4186 && lvalid.get(i)
4187 && rvalid.get(j)
4188 {
4189 let value = apply(lsrc[i], rsrc[j]);
4190 data.push(value);
4191 if value.is_nan() {
4192 all_valid = false;
4193 } else {
4194 words[k / 64] |= 1_u64 << (k % 64);
4195 }
4196 } else {
4197 data.push(0.0);
4198 all_valid = false;
4199 }
4200 }
4201 if all_valid {
4202 return Ok(Self::from_f64_values(data));
4203 }
4204 Ok(Self::from_f64_values_with_validity(
4205 data,
4206 ValidityMask {
4207 words,
4208 len: out_len,
4209 },
4210 ))
4211 }
4212
4213 pub fn aligned_binary_f64_int64_unit_ranges(
4221 &self,
4222 right: &Self,
4223 left_range: (i64, i64),
4224 right_range: (i64, i64),
4225 union_range: (i64, i64),
4226 op: ArithmeticOp,
4227 ) -> Result<Self, ColumnError> {
4228 if !matches!(self.dtype, DType::Float64) || !matches!(right.dtype, DType::Float64) {
4229 return Err(ColumnError::DTypeMismatch {
4230 left: self.dtype,
4231 right: right.dtype,
4232 });
4233 }
4234
4235 let (left_start, left_end) = left_range;
4236 let (right_start, right_end) = right_range;
4237 let (union_start, union_end) = union_range;
4238
4239 let Some(left_len) = unit_range_len(left_start, left_end) else {
4240 return Err(ColumnError::LengthMismatch {
4241 left: self.len(),
4242 right: right.len(),
4243 });
4244 };
4245 let Some(right_len) = unit_range_len(right_start, right_end) else {
4246 return Err(ColumnError::LengthMismatch {
4247 left: self.len(),
4248 right: right.len(),
4249 });
4250 };
4251 let Some(out_len) = unit_range_len(union_start, union_end) else {
4252 return Err(ColumnError::LengthMismatch {
4253 left: self.len(),
4254 right: right.len(),
4255 });
4256 };
4257 if left_len != self.len() || right_len != right.len() {
4258 return Err(ColumnError::LengthMismatch {
4259 left: self.len(),
4260 right: right.len(),
4261 });
4262 }
4263
4264 let lsrc = self.float64_binary_data();
4265 let rsrc = right.float64_binary_data();
4266 let lvalid = self.nan_aware_validity();
4267 let rvalid = right.nan_aware_validity();
4268 let apply = binary_f64_apply(op);
4269
4270 let mut data = vec![0.0; out_len];
4271 let mut words = vec![0_u64; out_len.div_ceil(64)];
4272 let overlap_start = left_start.max(right_start);
4273 let overlap_end = left_end.min(right_end);
4274 let mut all_valid = overlap_start == union_start && overlap_end == union_end;
4275
4276 if overlap_start <= overlap_end {
4277 for value in overlap_start..=overlap_end {
4278 let out_idx = (value - union_start) as usize;
4279 let left_idx = (value - left_start) as usize;
4280 let right_idx = (value - right_start) as usize;
4281 if lvalid.get(left_idx) && rvalid.get(right_idx) {
4282 let result = apply(lsrc[left_idx], rsrc[right_idx]);
4283 data[out_idx] = result;
4284 if result.is_nan() {
4285 all_valid = false;
4286 } else {
4287 words[out_idx / 64] |= 1_u64 << (out_idx % 64);
4288 }
4289 } else {
4290 all_valid = false;
4291 }
4292 }
4293 }
4294
4295 if all_valid {
4296 return Ok(Self::from_f64_values(data));
4297 }
4298 Ok(Self::from_f64_values_with_validity(
4299 data,
4300 ValidityMask {
4301 words,
4302 len: out_len,
4303 },
4304 ))
4305 }
4306
4307 pub fn aligned_binary_f64_same_positions(
4313 &self,
4314 right: &Self,
4315 op: ArithmeticOp,
4316 ) -> Result<Self, ColumnError> {
4317 debug_assert_eq!(self.len(), right.len());
4318 let out_len = self.len();
4319
4320 let lsrc = self.float64_binary_data();
4321 let rsrc = right.float64_binary_data();
4322
4323 if self.validity.all()
4330 && right.validity.all()
4331 && !lsrc.iter().any(|x| x.is_nan())
4332 && !rsrc.iter().any(|x| x.is_nan())
4333 {
4334 return Ok(Self::from_f64_values(apply_f64_slices(op, &lsrc, &rsrc)));
4335 }
4336
4337 let lvalid = self.nan_aware_validity();
4338 let rvalid = right.nan_aware_validity();
4339 let apply = binary_f64_apply(op);
4340
4341 let mut data = Vec::with_capacity(out_len);
4342 let mut all_valid = true;
4343 for i in 0..out_len {
4344 if lvalid.get(i) && rvalid.get(i) {
4345 data.push(apply(lsrc[i], rsrc[i]));
4346 } else {
4347 all_valid = false;
4348 break;
4349 }
4350 }
4351 if all_valid {
4352 return Ok(Self::from_f64_values(data));
4353 }
4354
4355 let mut values = Vec::with_capacity(out_len);
4356 for i in 0..out_len {
4357 if lvalid.get(i) && rvalid.get(i) {
4358 values.push(Scalar::Float64(apply(lsrc[i], rsrc[i])));
4359 } else {
4360 values.push(Scalar::Null(NullKind::NaN));
4361 }
4362 }
4363 Self::new(DType::Float64, values)
4364 }
4365
4366 fn cached_float64_data(&self) -> Option<&[f64]> {
4367 match &self.data {
4368 Some(ColumnData::Float64(data)) if data.len() == self.values.len() => {
4369 return Some(data.as_slice());
4370 }
4371 _ => {}
4372 }
4373
4374 match &self.values {
4375 ScalarValues::LazyAllValidFloat64 { data, .. } if data.len() == self.validity.len() => {
4376 Some(data.as_ref())
4377 }
4378 ScalarValues::LazyNullableFloat64 { data, .. } if data.len() == self.validity.len() => {
4379 Some(data.as_slice())
4380 }
4381 _ => None,
4382 }
4383 }
4384
4385 fn float64_binary_data(&self) -> std::borrow::Cow<'_, [f64]> {
4386 if let Some(data) = self.cached_float64_data() {
4387 return std::borrow::Cow::Borrowed(data);
4388 }
4389
4390 match ColumnData::from_scalars(&self.values, DType::Float64) {
4391 ColumnData::Float64(data) => std::borrow::Cow::Owned(data),
4392 _ => unreachable!("Float64 materialization must produce Float64 data"),
4393 }
4394 }
4395
4396 #[must_use]
4398 fn nan_aware_validity(&self) -> ValidityMask {
4399 let mut mask = self.validity.clone();
4400
4401 if let Some(data) = self.cached_float64_data() {
4402 for (i, value) in data.iter().enumerate() {
4403 if value.is_nan() {
4404 mask.set(i, false);
4405 }
4406 }
4407 return mask;
4408 }
4409
4410 for (i, value) in self.values.iter().enumerate() {
4411 if matches!(value, Scalar::Float64(f) if f.is_nan()) {
4412 mask.set(i, false);
4413 }
4414 }
4415 mask
4416 }
4417
4418 fn is_nan_at(&self, i: usize) -> bool {
4420 self.values.get(i).is_some_and(|v| v.is_nan())
4421 }
4422
4423 pub fn binary_numeric(&self, right: &Self, op: ArithmeticOp) -> Result<Self, ColumnError> {
4424 if self.len() != right.len() {
4425 return Err(ColumnError::LengthMismatch {
4426 left: self.len(),
4427 right: right.len(),
4428 });
4429 }
4430
4431 let mut out_dtype = common_dtype(self.dtype, right.dtype)?;
4432 if matches!(out_dtype, DType::Bool) {
4433 out_dtype = DType::Int64;
4434 }
4435 let int_pow = matches!(op, ArithmeticOp::Pow)
4439 && self.dtype == DType::Int64
4440 && right.dtype == DType::Int64;
4441 if matches!(op, ArithmeticOp::Div | ArithmeticOp::Pow) && !int_pow {
4442 out_dtype = DType::Float64;
4443 }
4444
4445 if let Some(result) = self.try_vectorized_binary(right, op, out_dtype) {
4447 return result;
4448 }
4449
4450 if matches!(op, ArithmeticOp::Mod | ArithmeticOp::FloorDiv)
4452 && matches!(out_dtype, DType::Int64)
4453 {
4454 out_dtype = DType::Float64;
4455 }
4456
4457 let values = self
4459 .values
4460 .iter()
4461 .zip(&right.values)
4462 .map(|(left, right)| {
4463 if left.is_missing() || right.is_missing() {
4464 return Ok::<_, ColumnError>(if left.is_nan() || right.is_nan() {
4465 Scalar::Null(NullKind::NaN)
4466 } else {
4467 Scalar::missing_for_dtype(out_dtype)
4468 });
4469 }
4470
4471 if matches!(out_dtype, DType::Int64) {
4472 let lhs_i64 = match cast_scalar(left, DType::Int64)? {
4473 Scalar::Int64(v) => v,
4474 _ => unreachable!(),
4475 };
4476 let rhs_i64 = match cast_scalar(right, DType::Int64)? {
4477 Scalar::Int64(v) => v,
4478 _ => unreachable!(),
4479 };
4480 let result = match op {
4481 ArithmeticOp::Add => lhs_i64.wrapping_add(rhs_i64),
4482 ArithmeticOp::Sub => lhs_i64.wrapping_sub(rhs_i64),
4483 ArithmeticOp::Mul => lhs_i64.wrapping_mul(rhs_i64),
4484 ArithmeticOp::Pow => {
4488 if rhs_i64 < 0 {
4489 return Err(ColumnError::NegativeIntegerPower);
4490 }
4491 lhs_i64.wrapping_pow(u32::try_from(rhs_i64).unwrap_or(u32::MAX))
4492 }
4493 ArithmeticOp::Div | ArithmeticOp::Mod | ArithmeticOp::FloorDiv => {
4494 unreachable!()
4495 }
4496 };
4497 return Ok(Scalar::Int64(result));
4498 }
4499
4500 let lhs = left.to_f64()?;
4501 let rhs = right.to_f64()?;
4502 let result = match op {
4503 ArithmeticOp::Add => lhs + rhs,
4504 ArithmeticOp::Sub => lhs - rhs,
4505 ArithmeticOp::Mul => lhs * rhs,
4506 ArithmeticOp::Div => lhs / rhs,
4507 ArithmeticOp::Mod => python_mod_f64(lhs, rhs),
4508 ArithmeticOp::Pow => lhs.powf(rhs),
4509 ArithmeticOp::FloorDiv => python_floor_div_f64(lhs, rhs),
4510 };
4511
4512 Ok(Scalar::Float64(result))
4513 })
4514 .collect::<Result<Vec<_>, _>>()?;
4515
4516 Self::new(out_dtype, values)
4517 }
4518
4519 pub fn add(&self, right: &Self) -> Result<Self, ColumnError> {
4521 self.binary_numeric(right, ArithmeticOp::Add)
4522 }
4523
4524 pub fn radd(&self, left: &Self) -> Result<Self, ColumnError> {
4526 left.binary_numeric(self, ArithmeticOp::Add)
4527 }
4528
4529 pub fn sub(&self, right: &Self) -> Result<Self, ColumnError> {
4531 self.binary_numeric(right, ArithmeticOp::Sub)
4532 }
4533
4534 pub fn rsub(&self, left: &Self) -> Result<Self, ColumnError> {
4536 left.binary_numeric(self, ArithmeticOp::Sub)
4537 }
4538
4539 pub fn subtract(&self, right: &Self) -> Result<Self, ColumnError> {
4541 self.sub(right)
4542 }
4543
4544 pub fn mul(&self, right: &Self) -> Result<Self, ColumnError> {
4546 self.binary_numeric(right, ArithmeticOp::Mul)
4547 }
4548
4549 pub fn rmul(&self, left: &Self) -> Result<Self, ColumnError> {
4551 left.binary_numeric(self, ArithmeticOp::Mul)
4552 }
4553
4554 pub fn multiply(&self, right: &Self) -> Result<Self, ColumnError> {
4556 self.mul(right)
4557 }
4558
4559 pub fn div(&self, right: &Self) -> Result<Self, ColumnError> {
4561 self.binary_numeric(right, ArithmeticOp::Div)
4562 }
4563
4564 pub fn rdiv(&self, left: &Self) -> Result<Self, ColumnError> {
4566 left.binary_numeric(self, ArithmeticOp::Div)
4567 }
4568
4569 pub fn divide(&self, right: &Self) -> Result<Self, ColumnError> {
4571 self.div(right)
4572 }
4573
4574 pub fn truediv(&self, right: &Self) -> Result<Self, ColumnError> {
4576 self.div(right)
4577 }
4578
4579 pub fn rtruediv(&self, left: &Self) -> Result<Self, ColumnError> {
4581 self.rdiv(left)
4582 }
4583
4584 pub fn floordiv(&self, right: &Self) -> Result<Self, ColumnError> {
4586 self.binary_numeric(right, ArithmeticOp::FloorDiv)
4587 }
4588
4589 pub fn rfloordiv(&self, left: &Self) -> Result<Self, ColumnError> {
4591 left.binary_numeric(self, ArithmeticOp::FloorDiv)
4592 }
4593
4594 pub fn r#mod(&self, right: &Self) -> Result<Self, ColumnError> {
4596 self.binary_numeric(right, ArithmeticOp::Mod)
4597 }
4598
4599 pub fn rmod(&self, left: &Self) -> Result<Self, ColumnError> {
4601 left.binary_numeric(self, ArithmeticOp::Mod)
4602 }
4603
4604 pub fn pow(&self, right: &Self) -> Result<Self, ColumnError> {
4606 self.binary_numeric(right, ArithmeticOp::Pow)
4607 }
4608
4609 pub fn rpow(&self, left: &Self) -> Result<Self, ColumnError> {
4611 left.binary_numeric(self, ArithmeticOp::Pow)
4612 }
4613
4614 pub fn power(&self, right: &Self) -> Result<Self, ColumnError> {
4616 self.pow(right)
4617 }
4618
4619 pub fn float_power(&self, right: &Self) -> Result<Self, ColumnError> {
4625 if self.len() != right.len() {
4626 return Err(ColumnError::LengthMismatch {
4627 left: self.len(),
4628 right: right.len(),
4629 });
4630 }
4631 if let Some(out) = self.typed_float_binary(right, |b, e| b.powf(e)) {
4632 return Ok(out);
4633 }
4634 let mut out = Vec::with_capacity(self.values.len());
4635 for (base, exp) in self.values.iter().zip(&right.values) {
4636 if base.is_missing() || exp.is_missing() {
4637 out.push(Scalar::Float64(f64::NAN));
4638 continue;
4639 }
4640 let b = base.to_f64().map_err(ColumnError::Type)?;
4641 let e = exp.to_f64().map_err(ColumnError::Type)?;
4642 let result = b.powf(e);
4643 out.push(Scalar::Float64(result));
4644 }
4645 Self::new(DType::Float64, out)
4646 }
4647
4648 pub fn remainder(&self, right: &Self) -> Result<Self, ColumnError> {
4650 self.r#mod(right)
4651 }
4652
4653 pub fn floor_divide(&self, right: &Self) -> Result<Self, ColumnError> {
4655 self.floordiv(right)
4656 }
4657
4658 pub fn true_divide(&self, right: &Self) -> Result<Self, ColumnError> {
4660 self.div(right)
4661 }
4662
4663 pub fn atan2(&self, other: &Self) -> Result<Self, ColumnError> {
4665 if self.len() != other.len() {
4666 return Err(ColumnError::LengthMismatch {
4667 left: self.len(),
4668 right: other.len(),
4669 });
4670 }
4671 if let Some(out) = self.typed_float_binary(other, |y, x| y.atan2(x)) {
4672 return Ok(out);
4673 }
4674 let mut out = Vec::with_capacity(self.values.len());
4675 for (y, x) in self.values.iter().zip(&other.values) {
4676 if y.is_missing() || x.is_missing() {
4677 out.push(Scalar::Float64(f64::NAN));
4678 continue;
4679 }
4680 let yf = y.to_f64().map_err(ColumnError::Type)?;
4681 let xf = x.to_f64().map_err(ColumnError::Type)?;
4682 out.push(Scalar::Float64(yf.atan2(xf)));
4683 }
4684 Self::new(DType::Float64, out)
4685 }
4686
4687 pub fn hypot(&self, other: &Self) -> Result<Self, ColumnError> {
4689 if self.len() != other.len() {
4690 return Err(ColumnError::LengthMismatch {
4691 left: self.len(),
4692 right: other.len(),
4693 });
4694 }
4695 if let Some(out) = self.typed_float_binary(other, |a, b| a.hypot(b)) {
4696 return Ok(out);
4697 }
4698 let mut out = Vec::with_capacity(self.values.len());
4699 for (a, b) in self.values.iter().zip(&other.values) {
4700 if a.is_missing() || b.is_missing() {
4701 out.push(Scalar::Float64(f64::NAN));
4702 continue;
4703 }
4704 let af = a.to_f64().map_err(ColumnError::Type)?;
4705 let bf = b.to_f64().map_err(ColumnError::Type)?;
4706 out.push(Scalar::Float64(af.hypot(bf)));
4707 }
4708 Self::new(DType::Float64, out)
4709 }
4710
4711 pub fn fmod(&self, other: &Self) -> Result<Self, ColumnError> {
4713 if self.len() != other.len() {
4714 return Err(ColumnError::LengthMismatch {
4715 left: self.len(),
4716 right: other.len(),
4717 });
4718 }
4719 if let Some(out) = self.typed_float_binary(other, |a, b| a % b) {
4720 return Ok(out);
4721 }
4722 let mut out = Vec::with_capacity(self.values.len());
4723 for (a, b) in self.values.iter().zip(&other.values) {
4724 if a.is_missing() || b.is_missing() {
4725 out.push(Scalar::Float64(f64::NAN));
4726 continue;
4727 }
4728 let af = a.to_f64().map_err(ColumnError::Type)?;
4729 let bf = b.to_f64().map_err(ColumnError::Type)?;
4730 out.push(Scalar::Float64(af % bf));
4731 }
4732 Self::new(DType::Float64, out)
4733 }
4734
4735 pub fn copysign(&self, other: &Self) -> Result<Self, ColumnError> {
4737 if self.len() != other.len() {
4738 return Err(ColumnError::LengthMismatch {
4739 left: self.len(),
4740 right: other.len(),
4741 });
4742 }
4743 if let Some(out) = self.typed_float_binary(other, |m, s| m.copysign(s)) {
4744 return Ok(out);
4745 }
4746 let mut out = Vec::with_capacity(self.values.len());
4747 for (mag, sign) in self.values.iter().zip(&other.values) {
4748 if mag.is_missing() || sign.is_missing() {
4749 out.push(Scalar::Float64(f64::NAN));
4750 continue;
4751 }
4752 let mf = mag.to_f64().map_err(ColumnError::Type)?;
4753 let sf = sign.to_f64().map_err(ColumnError::Type)?;
4754 out.push(Scalar::Float64(mf.copysign(sf)));
4755 }
4756 Self::new(DType::Float64, out)
4757 }
4758
4759 pub fn sign(&self) -> Result<Self, ColumnError> {
4761 if let Some(data) = self.as_i64_slice() {
4766 return Ok(Self::from_i64_values(
4767 data.iter()
4768 .map(|&x| {
4769 if x > 0 {
4770 1
4771 } else if x < 0 {
4772 -1
4773 } else {
4774 0
4775 }
4776 })
4777 .collect(),
4778 ));
4779 }
4780 if let Some(data) = self.as_f64_slice() {
4781 return Ok(Self::from_f64_values(
4782 data.iter()
4783 .map(|&x| {
4784 if x > 0.0 {
4785 1.0
4786 } else if x < 0.0 {
4787 -1.0
4788 } else {
4789 0.0
4790 }
4791 })
4792 .collect(),
4793 ));
4794 }
4795 let mut out = Vec::with_capacity(self.values.len());
4796 for v in &self.values {
4797 if v.is_missing() {
4798 out.push(Scalar::Float64(f64::NAN));
4799 continue;
4800 }
4801 match v {
4802 Scalar::Int64(x) => {
4803 let s = if *x > 0 {
4804 1
4805 } else if *x < 0 {
4806 -1
4807 } else {
4808 0
4809 };
4810 out.push(Scalar::Int64(s));
4811 }
4812 Scalar::Float64(x) => {
4813 let s = if x.is_nan() {
4814 f64::NAN
4815 } else if *x > 0.0 {
4816 1.0
4817 } else if *x < 0.0 {
4818 -1.0
4819 } else {
4820 0.0
4821 };
4822 out.push(Scalar::Float64(s));
4823 }
4824 _ => {
4825 return Err(ColumnError::Type(TypeError::NonNumericValue {
4826 value: format!("{v:?}"),
4827 dtype: self.dtype,
4828 }));
4829 }
4830 }
4831 }
4832 let dtype = match self.dtype {
4833 DType::Int64 => DType::Int64,
4834 _ => DType::Float64,
4835 };
4836 Self::new(dtype, out)
4837 }
4838
4839 pub fn signbit(&self) -> Result<Self, ColumnError> {
4843 if let Some(data) = self.as_i64_slice() {
4847 return Ok(Self::from_bool_values(
4848 data.iter().map(|&x| x < 0).collect(),
4849 ));
4850 }
4851 if let Some(data) = self.as_f64_slice() {
4852 return Ok(Self::from_bool_values(
4853 data.iter().map(|&x| x.is_sign_negative()).collect(),
4854 ));
4855 }
4856 let mut out = Vec::with_capacity(self.values.len());
4857 for v in &self.values {
4858 if v.is_missing() {
4859 out.push(Scalar::Bool(false));
4860 continue;
4861 }
4862 match v {
4863 Scalar::Int64(x) => out.push(Scalar::Bool(*x < 0)),
4864 Scalar::Float64(x) => out.push(Scalar::Bool(x.is_sign_negative())),
4865 _ => {
4866 return Err(ColumnError::Type(TypeError::NonNumericValue {
4867 value: format!("{v:?}"),
4868 dtype: self.dtype,
4869 }));
4870 }
4871 }
4872 }
4873 Self::new(DType::Bool, out)
4874 }
4875
4876 pub fn heaviside(&self, h0: f64) -> Result<Self, ColumnError> {
4883 let mut out = Vec::with_capacity(self.values.len());
4884 for v in &self.values {
4885 if v.is_missing() {
4886 out.push(Scalar::Float64(f64::NAN));
4887 continue;
4888 }
4889 match v {
4890 Scalar::Int64(x) => {
4891 let val = if *x < 0 {
4892 0.0
4893 } else if *x > 0 {
4894 1.0
4895 } else {
4896 h0
4897 };
4898 out.push(Scalar::Float64(val));
4899 }
4900 Scalar::Float64(x) => {
4901 let val = if x.is_nan() {
4902 f64::NAN
4903 } else if *x < 0.0 {
4904 0.0
4905 } else if *x > 0.0 {
4906 1.0
4907 } else {
4908 h0
4909 };
4910 out.push(Scalar::Float64(val));
4911 }
4912 _ => {
4913 return Err(ColumnError::Type(TypeError::NonNumericValue {
4914 value: format!("{v:?}"),
4915 dtype: self.dtype,
4916 }));
4917 }
4918 }
4919 }
4920 Self::new(DType::Float64, out)
4921 }
4922
4923 pub fn gcd(&self, other: &Self) -> Result<Self, ColumnError> {
4927 if self.len() != other.len() {
4928 return Err(ColumnError::LengthMismatch {
4929 left: self.len(),
4930 right: other.len(),
4931 });
4932 }
4933 fn compute_gcd(mut a: i64, mut b: i64) -> i64 {
4934 a = a.abs();
4935 b = b.abs();
4936 while b != 0 {
4937 let t = b;
4938 b = a % b;
4939 a = t;
4940 }
4941 a
4942 }
4943 let mut out = Vec::with_capacity(self.values.len());
4944 for (a, b) in self.values.iter().zip(&other.values) {
4945 if a.is_missing() || b.is_missing() {
4946 out.push(Scalar::Null(NullKind::Null));
4947 continue;
4948 }
4949 match (a, b) {
4950 (Scalar::Int64(x), Scalar::Int64(y)) => {
4951 out.push(Scalar::Int64(compute_gcd(*x, *y)));
4952 }
4953 _ => {
4954 return Err(ColumnError::Type(TypeError::NonNumericValue {
4955 value: format!("{a:?}"),
4956 dtype: self.dtype,
4957 }));
4958 }
4959 }
4960 }
4961 Self::new(DType::Int64, out)
4962 }
4963
4964 pub fn lcm(&self, other: &Self) -> Result<Self, ColumnError> {
4968 if self.len() != other.len() {
4969 return Err(ColumnError::LengthMismatch {
4970 left: self.len(),
4971 right: other.len(),
4972 });
4973 }
4974 fn compute_gcd(mut a: i64, mut b: i64) -> i64 {
4975 a = a.abs();
4976 b = b.abs();
4977 while b != 0 {
4978 let t = b;
4979 b = a % b;
4980 a = t;
4981 }
4982 a
4983 }
4984 let mut out = Vec::with_capacity(self.values.len());
4985 for (a, b) in self.values.iter().zip(&other.values) {
4986 if a.is_missing() || b.is_missing() {
4987 out.push(Scalar::Null(NullKind::Null));
4988 continue;
4989 }
4990 match (a, b) {
4991 (Scalar::Int64(x), Scalar::Int64(y)) => {
4992 let g = compute_gcd(*x, *y);
4993 let result = if g == 0 { 0 } else { (x.abs() / g) * y.abs() };
4994 out.push(Scalar::Int64(result));
4995 }
4996 _ => {
4997 return Err(ColumnError::Type(TypeError::NonNumericValue {
4998 value: format!("{a:?}"),
4999 dtype: self.dtype,
5000 }));
5001 }
5002 }
5003 }
5004 Self::new(DType::Int64, out)
5005 }
5006
5007 pub fn bitwise_and(&self, other: &Self) -> Result<Self, ColumnError> {
5009 if self.len() != other.len() {
5010 return Err(ColumnError::LengthMismatch {
5011 left: self.len(),
5012 right: other.len(),
5013 });
5014 }
5015 let mut out = Vec::with_capacity(self.values.len());
5016 for (a, b) in self.values.iter().zip(&other.values) {
5017 if a.is_missing() || b.is_missing() {
5018 out.push(Scalar::Null(NullKind::Null));
5019 continue;
5020 }
5021 match (a, b) {
5022 (Scalar::Int64(x), Scalar::Int64(y)) => out.push(Scalar::Int64(x & y)),
5023 (Scalar::Bool(x), Scalar::Bool(y)) => out.push(Scalar::Bool(*x && *y)),
5024 _ => {
5025 return Err(ColumnError::Type(TypeError::NonNumericValue {
5026 value: format!("{a:?}"),
5027 dtype: self.dtype,
5028 }));
5029 }
5030 }
5031 }
5032 Self::new(self.dtype, out)
5033 }
5034
5035 pub fn bitwise_or(&self, other: &Self) -> Result<Self, ColumnError> {
5037 if self.len() != other.len() {
5038 return Err(ColumnError::LengthMismatch {
5039 left: self.len(),
5040 right: other.len(),
5041 });
5042 }
5043 let mut out = Vec::with_capacity(self.values.len());
5044 for (a, b) in self.values.iter().zip(&other.values) {
5045 if a.is_missing() || b.is_missing() {
5046 out.push(Scalar::Null(NullKind::Null));
5047 continue;
5048 }
5049 match (a, b) {
5050 (Scalar::Int64(x), Scalar::Int64(y)) => out.push(Scalar::Int64(x | y)),
5051 (Scalar::Bool(x), Scalar::Bool(y)) => out.push(Scalar::Bool(*x || *y)),
5052 _ => {
5053 return Err(ColumnError::Type(TypeError::NonNumericValue {
5054 value: format!("{a:?}"),
5055 dtype: self.dtype,
5056 }));
5057 }
5058 }
5059 }
5060 Self::new(self.dtype, out)
5061 }
5062
5063 pub fn bitwise_xor(&self, other: &Self) -> Result<Self, ColumnError> {
5065 if self.len() != other.len() {
5066 return Err(ColumnError::LengthMismatch {
5067 left: self.len(),
5068 right: other.len(),
5069 });
5070 }
5071 let mut out = Vec::with_capacity(self.values.len());
5072 for (a, b) in self.values.iter().zip(&other.values) {
5073 if a.is_missing() || b.is_missing() {
5074 out.push(Scalar::Null(NullKind::Null));
5075 continue;
5076 }
5077 match (a, b) {
5078 (Scalar::Int64(x), Scalar::Int64(y)) => out.push(Scalar::Int64(x ^ y)),
5079 (Scalar::Bool(x), Scalar::Bool(y)) => out.push(Scalar::Bool(*x ^ *y)),
5080 _ => {
5081 return Err(ColumnError::Type(TypeError::NonNumericValue {
5082 value: format!("{a:?}"),
5083 dtype: self.dtype,
5084 }));
5085 }
5086 }
5087 }
5088 Self::new(self.dtype, out)
5089 }
5090
5091 pub fn left_shift(&self, other: &Self) -> Result<Self, ColumnError> {
5095 if self.len() != other.len() {
5096 return Err(ColumnError::LengthMismatch {
5097 left: self.len(),
5098 right: other.len(),
5099 });
5100 }
5101 let mut out = Vec::with_capacity(self.values.len());
5102 for (a, b) in self.values.iter().zip(&other.values) {
5103 if a.is_missing() || b.is_missing() {
5104 out.push(Scalar::Null(NullKind::Null));
5105 continue;
5106 }
5107 match (a, b) {
5108 (Scalar::Int64(x), Scalar::Int64(y)) => {
5109 let shift = (*y).clamp(0, 63) as u32;
5110 out.push(Scalar::Int64(x.wrapping_shl(shift)));
5111 }
5112 _ => {
5113 return Err(ColumnError::Type(TypeError::NonNumericValue {
5114 value: format!("{a:?}"),
5115 dtype: self.dtype,
5116 }));
5117 }
5118 }
5119 }
5120 Self::new(DType::Int64, out)
5121 }
5122
5123 pub fn right_shift(&self, other: &Self) -> Result<Self, ColumnError> {
5127 if self.len() != other.len() {
5128 return Err(ColumnError::LengthMismatch {
5129 left: self.len(),
5130 right: other.len(),
5131 });
5132 }
5133 let mut out = Vec::with_capacity(self.values.len());
5134 for (a, b) in self.values.iter().zip(&other.values) {
5135 if a.is_missing() || b.is_missing() {
5136 out.push(Scalar::Null(NullKind::Null));
5137 continue;
5138 }
5139 match (a, b) {
5140 (Scalar::Int64(x), Scalar::Int64(y)) => {
5141 let shift = (*y).clamp(0, 63) as u32;
5142 out.push(Scalar::Int64(x.wrapping_shr(shift)));
5143 }
5144 _ => {
5145 return Err(ColumnError::Type(TypeError::NonNumericValue {
5146 value: format!("{a:?}"),
5147 dtype: self.dtype,
5148 }));
5149 }
5150 }
5151 }
5152 Self::new(DType::Int64, out)
5153 }
5154
5155 pub fn bitwise_not(&self) -> Result<Self, ColumnError> {
5157 let mut out = Vec::with_capacity(self.values.len());
5158 for v in &self.values {
5159 if v.is_missing() {
5160 out.push(Scalar::Null(NullKind::Null));
5161 continue;
5162 }
5163 match v {
5164 Scalar::Int64(x) => out.push(Scalar::Int64(!x)),
5165 Scalar::Bool(x) => out.push(Scalar::Bool(!x)),
5166 _ => {
5167 return Err(ColumnError::Type(TypeError::NonNumericValue {
5168 value: format!("{v:?}"),
5169 dtype: self.dtype,
5170 }));
5171 }
5172 }
5173 }
5174 Self::new(self.dtype, out)
5175 }
5176
5177 pub fn invert(&self) -> Result<Self, ColumnError> {
5179 self.bitwise_not()
5180 }
5181
5182 pub fn maximum(&self, other: &Self) -> Result<Self, ColumnError> {
5184 if self.len() != other.len() {
5185 return Err(ColumnError::LengthMismatch {
5186 left: self.len(),
5187 right: other.len(),
5188 });
5189 }
5190 if let Some(out) = self.typed_float_binary(other, f64::max) {
5193 return Ok(out);
5194 }
5195 let mut out = Vec::with_capacity(self.values.len());
5196 for (a, b) in self.values.iter().zip(&other.values) {
5197 if a.is_missing() || b.is_missing() {
5198 out.push(Scalar::Float64(f64::NAN));
5199 continue;
5200 }
5201 let af = a.to_f64().map_err(ColumnError::Type)?;
5202 let bf = b.to_f64().map_err(ColumnError::Type)?;
5203 if af.is_nan() || bf.is_nan() {
5204 out.push(Scalar::Float64(f64::NAN));
5205 } else {
5206 out.push(Scalar::Float64(af.max(bf)));
5207 }
5208 }
5209 Self::new(DType::Float64, out)
5210 }
5211
5212 pub fn minimum(&self, other: &Self) -> Result<Self, ColumnError> {
5214 if self.len() != other.len() {
5215 return Err(ColumnError::LengthMismatch {
5216 left: self.len(),
5217 right: other.len(),
5218 });
5219 }
5220 if let Some(out) = self.typed_float_binary(other, f64::min) {
5221 return Ok(out);
5222 }
5223 let mut out = Vec::with_capacity(self.values.len());
5224 for (a, b) in self.values.iter().zip(&other.values) {
5225 if a.is_missing() || b.is_missing() {
5226 out.push(Scalar::Float64(f64::NAN));
5227 continue;
5228 }
5229 let af = a.to_f64().map_err(ColumnError::Type)?;
5230 let bf = b.to_f64().map_err(ColumnError::Type)?;
5231 if af.is_nan() || bf.is_nan() {
5232 out.push(Scalar::Float64(f64::NAN));
5233 } else {
5234 out.push(Scalar::Float64(af.min(bf)));
5235 }
5236 }
5237 Self::new(DType::Float64, out)
5238 }
5239
5240 pub fn fmax(&self, other: &Self) -> Result<Self, ColumnError> {
5242 if self.len() != other.len() {
5243 return Err(ColumnError::LengthMismatch {
5244 left: self.len(),
5245 right: other.len(),
5246 });
5247 }
5248 if let Some(out) = self.typed_float_binary(other, f64::max) {
5251 return Ok(out);
5252 }
5253 let mut out = Vec::with_capacity(self.values.len());
5254 for (a, b) in self.values.iter().zip(&other.values) {
5255 let af = a.to_f64().ok();
5256 let bf = b.to_f64().ok();
5257 let result = match (af, bf) {
5258 (Some(x), Some(y)) if x.is_nan() => y,
5259 (Some(x), Some(y)) if y.is_nan() => x,
5260 (Some(x), Some(y)) => x.max(y),
5261 (Some(x), None) => x,
5262 (None, Some(y)) => y,
5263 (None, None) => f64::NAN,
5264 };
5265 out.push(Scalar::Float64(result));
5266 }
5267 Self::new(DType::Float64, out)
5268 }
5269
5270 pub fn fmin(&self, other: &Self) -> Result<Self, ColumnError> {
5272 if self.len() != other.len() {
5273 return Err(ColumnError::LengthMismatch {
5274 left: self.len(),
5275 right: other.len(),
5276 });
5277 }
5278 if let Some(out) = self.typed_float_binary(other, f64::min) {
5279 return Ok(out);
5280 }
5281 let mut out = Vec::with_capacity(self.values.len());
5282 for (a, b) in self.values.iter().zip(&other.values) {
5283 let af = a.to_f64().ok();
5284 let bf = b.to_f64().ok();
5285 let result = match (af, bf) {
5286 (Some(x), Some(y)) if x.is_nan() => y,
5287 (Some(x), Some(y)) if y.is_nan() => x,
5288 (Some(x), Some(y)) => x.min(y),
5289 (Some(x), None) => x,
5290 (None, Some(y)) => y,
5291 (None, None) => f64::NAN,
5292 };
5293 out.push(Scalar::Float64(result));
5294 }
5295 Self::new(DType::Float64, out)
5296 }
5297
5298 pub fn logical_and(&self, other: &Self) -> Result<Self, ColumnError> {
5300 if self.len() != other.len() {
5301 return Err(ColumnError::LengthMismatch {
5302 left: self.len(),
5303 right: other.len(),
5304 });
5305 }
5306 let mut out = Vec::with_capacity(self.values.len());
5307 for (a, b) in self.values.iter().zip(&other.values) {
5308 if a.is_missing() || b.is_missing() {
5309 out.push(Scalar::Null(NullKind::Null));
5310 continue;
5311 }
5312 let av = match a {
5313 Scalar::Bool(x) => *x,
5314 _ => a.to_f64().map(|v| v != 0.0).unwrap_or(false),
5315 };
5316 let bv = match b {
5317 Scalar::Bool(x) => *x,
5318 _ => b.to_f64().map(|v| v != 0.0).unwrap_or(false),
5319 };
5320 out.push(Scalar::Bool(av && bv));
5321 }
5322 Self::new(DType::Bool, out)
5323 }
5324
5325 pub fn logical_or(&self, other: &Self) -> Result<Self, ColumnError> {
5327 if self.len() != other.len() {
5328 return Err(ColumnError::LengthMismatch {
5329 left: self.len(),
5330 right: other.len(),
5331 });
5332 }
5333 let mut out = Vec::with_capacity(self.values.len());
5334 for (a, b) in self.values.iter().zip(&other.values) {
5335 if a.is_missing() || b.is_missing() {
5336 out.push(Scalar::Null(NullKind::Null));
5337 continue;
5338 }
5339 let av = match a {
5340 Scalar::Bool(x) => *x,
5341 _ => a.to_f64().map(|v| v != 0.0).unwrap_or(false),
5342 };
5343 let bv = match b {
5344 Scalar::Bool(x) => *x,
5345 _ => b.to_f64().map(|v| v != 0.0).unwrap_or(false),
5346 };
5347 out.push(Scalar::Bool(av || bv));
5348 }
5349 Self::new(DType::Bool, out)
5350 }
5351
5352 pub fn logical_xor(&self, other: &Self) -> Result<Self, ColumnError> {
5354 if self.len() != other.len() {
5355 return Err(ColumnError::LengthMismatch {
5356 left: self.len(),
5357 right: other.len(),
5358 });
5359 }
5360 let mut out = Vec::with_capacity(self.values.len());
5361 for (a, b) in self.values.iter().zip(&other.values) {
5362 if a.is_missing() || b.is_missing() {
5363 out.push(Scalar::Null(NullKind::Null));
5364 continue;
5365 }
5366 let av = match a {
5367 Scalar::Bool(x) => *x,
5368 _ => a.to_f64().map(|v| v != 0.0).unwrap_or(false),
5369 };
5370 let bv = match b {
5371 Scalar::Bool(x) => *x,
5372 _ => b.to_f64().map(|v| v != 0.0).unwrap_or(false),
5373 };
5374 out.push(Scalar::Bool(av ^ bv));
5375 }
5376 Self::new(DType::Bool, out)
5377 }
5378
5379 pub fn logical_not(&self) -> Result<Self, ColumnError> {
5381 let mut out = Vec::with_capacity(self.values.len());
5382 for v in &self.values {
5383 if v.is_missing() {
5384 out.push(Scalar::Null(NullKind::Null));
5385 continue;
5386 }
5387 let bv = match v {
5388 Scalar::Bool(x) => *x,
5389 _ => v.to_f64().map(|x| x != 0.0).unwrap_or(false),
5390 };
5391 out.push(Scalar::Bool(!bv));
5392 }
5393 Self::new(DType::Bool, out)
5394 }
5395
5396 pub fn binary_comparison(&self, right: &Self, op: ComparisonOp) -> Result<Self, ColumnError> {
5401 if self.len() != right.len() {
5402 return Err(ColumnError::LengthMismatch {
5403 left: self.len(),
5404 right: right.len(),
5405 });
5406 }
5407
5408 if let (Some(l), Some(r)) = (self.as_f64_slice(), right.as_f64_slice()) {
5415 let bools: Vec<bool> = l
5416 .iter()
5417 .zip(r)
5418 .map(|(&a, &b)| match op {
5419 ComparisonOp::Gt => a > b,
5420 ComparisonOp::Lt => a < b,
5421 ComparisonOp::Eq => a == b,
5422 ComparisonOp::Ne => a != b,
5423 ComparisonOp::Ge => a >= b,
5424 ComparisonOp::Le => a <= b,
5425 })
5426 .collect();
5427 return Ok(Self::from_bool_values(bools));
5428 }
5429 if let (Some(l), Some(r)) = (self.as_i64_slice(), right.as_i64_slice()) {
5430 let bools: Vec<bool> = l
5431 .iter()
5432 .zip(r)
5433 .map(|(&a, &b)| match op {
5434 ComparisonOp::Gt => a > b,
5435 ComparisonOp::Lt => a < b,
5436 ComparisonOp::Eq => a == b,
5437 ComparisonOp::Ne => a != b,
5438 ComparisonOp::Ge => a >= b,
5439 ComparisonOp::Le => a <= b,
5440 })
5441 .collect();
5442 return Ok(Self::from_bool_values(bools));
5443 }
5444
5445 let values = self
5446 .values
5447 .iter()
5448 .zip(&right.values)
5449 .map(|(l, r)| -> Result<Scalar, ColumnError> {
5450 if l.is_missing() || r.is_missing() {
5451 return Ok(Scalar::Null(NullKind::Null));
5452 }
5453 let result = scalar_compare(l, r, op)?;
5454 Ok(Scalar::Bool(result))
5455 })
5456 .collect::<Result<Vec<_>, _>>()?;
5457
5458 Self::new(DType::Bool, values)
5459 }
5460
5461 pub fn eq(&self, right: &Self) -> Result<Self, ColumnError> {
5463 self.binary_comparison(right, ComparisonOp::Eq)
5464 }
5465
5466 pub fn ne(&self, right: &Self) -> Result<Self, ColumnError> {
5468 self.binary_comparison(right, ComparisonOp::Ne)
5469 }
5470
5471 pub fn lt(&self, right: &Self) -> Result<Self, ColumnError> {
5473 self.binary_comparison(right, ComparisonOp::Lt)
5474 }
5475
5476 pub fn le(&self, right: &Self) -> Result<Self, ColumnError> {
5478 self.binary_comparison(right, ComparisonOp::Le)
5479 }
5480
5481 pub fn gt(&self, right: &Self) -> Result<Self, ColumnError> {
5483 self.binary_comparison(right, ComparisonOp::Gt)
5484 }
5485
5486 pub fn ge(&self, right: &Self) -> Result<Self, ColumnError> {
5488 self.binary_comparison(right, ComparisonOp::Ge)
5489 }
5490
5491 pub fn compare_scalar(&self, scalar: &Scalar, op: ComparisonOp) -> Result<Self, ColumnError> {
5495 if scalar.is_missing() {
5496 let values = vec![Scalar::Null(NullKind::Null); self.len()];
5498 return Self::new(DType::Bool, values);
5499 }
5500
5501 if let Some(data) = self.as_f64_slice()
5512 && let Ok(s) = scalar.to_f64()
5513 {
5514 let bools: Vec<bool> = data
5515 .iter()
5516 .map(|&v| match op {
5517 ComparisonOp::Gt => v > s,
5518 ComparisonOp::Lt => v < s,
5519 ComparisonOp::Eq => v == s,
5520 ComparisonOp::Ne => v != s,
5521 ComparisonOp::Ge => v >= s,
5522 ComparisonOp::Le => v <= s,
5523 })
5524 .collect();
5525 return Ok(Self::from_bool_values(bools));
5526 }
5527 if let Some(data) = self.as_i64_slice()
5528 && let Scalar::Int64(s) = scalar
5529 {
5530 let s = *s;
5531 let bools: Vec<bool> = data
5532 .iter()
5533 .map(|&v| match op {
5534 ComparisonOp::Gt => v > s,
5535 ComparisonOp::Lt => v < s,
5536 ComparisonOp::Eq => v == s,
5537 ComparisonOp::Ne => v != s,
5538 ComparisonOp::Ge => v >= s,
5539 ComparisonOp::Le => v <= s,
5540 })
5541 .collect();
5542 return Ok(Self::from_bool_values(bools));
5543 }
5544
5545 let values = self
5546 .values
5547 .iter()
5548 .map(|v| -> Result<Scalar, ColumnError> {
5549 if v.is_missing() {
5550 return Ok(Scalar::Null(NullKind::Null));
5551 }
5552 let result = scalar_compare(v, scalar, op)?;
5553 Ok(Scalar::Bool(result))
5554 })
5555 .collect::<Result<Vec<_>, _>>()?;
5556
5557 Self::new(DType::Bool, values)
5558 }
5559
5560 pub fn filter_by_mask(&self, mask: &Self) -> Result<Self, ColumnError> {
5565 if mask.dtype != DType::Bool {
5566 return Err(ColumnError::InvalidMaskType { dtype: mask.dtype });
5567 }
5568 if self.len() != mask.len() {
5569 return Err(ColumnError::LengthMismatch {
5570 left: self.len(),
5571 right: mask.len(),
5572 });
5573 }
5574
5575 if let Some(mask_bits) = mask.as_bool_slice() {
5588 if let Some(data) = self.as_f64_slice() {
5589 let gathered: Vec<f64> = data
5590 .iter()
5591 .zip(mask_bits)
5592 .filter_map(|(&v, &m)| m.then_some(v))
5593 .collect();
5594 return Ok(Self::from_f64_values(gathered));
5595 }
5596 if let Some(data) = self.as_i64_slice() {
5597 let gathered: Vec<i64> = data
5598 .iter()
5599 .zip(mask_bits)
5600 .filter_map(|(&v, &m)| m.then_some(v))
5601 .collect();
5602 return Ok(Self::from_i64_values(gathered));
5603 }
5604 let values = self
5605 .values
5606 .iter()
5607 .zip(mask_bits)
5608 .filter_map(|(val, &m)| m.then_some(val.clone()))
5609 .collect::<Vec<_>>();
5610 return Self::new(self.dtype, values);
5611 }
5612
5613 if let Some(data) = self.as_f64_slice() {
5614 let gathered: Vec<f64> = data
5615 .iter()
5616 .zip(mask.values.iter())
5617 .filter_map(|(&v, m)| matches!(m, Scalar::Bool(true)).then_some(v))
5618 .collect();
5619 return Ok(Self::from_f64_values(gathered));
5620 }
5621 if let Some(data) = self.as_i64_slice() {
5622 let gathered: Vec<i64> = data
5623 .iter()
5624 .zip(mask.values.iter())
5625 .filter_map(|(&v, m)| matches!(m, Scalar::Bool(true)).then_some(v))
5626 .collect();
5627 return Ok(Self::from_i64_values(gathered));
5628 }
5629
5630 let values = self
5631 .values
5632 .iter()
5633 .zip(mask.values.iter())
5634 .filter_map(|(val, mask_val)| match mask_val {
5635 Scalar::Bool(true) => Some(val.clone()),
5636 _ => None,
5637 })
5638 .collect::<Vec<_>>();
5639
5640 Self::new(self.dtype, values)
5641 }
5642
5643 pub fn fillna(&self, fill_value: &Scalar) -> Result<Self, ColumnError> {
5648 if self.dtype == DType::Null {
5649 let replacement_dtype = if fill_value.is_missing() {
5650 DType::Null
5651 } else {
5652 fill_value.dtype()
5653 };
5654 let values = self
5655 .values
5656 .iter()
5657 .map(|value| {
5658 if value.is_missing() {
5659 fill_value.clone()
5660 } else {
5661 value.clone()
5662 }
5663 })
5664 .collect();
5665 return Self::new(replacement_dtype, values);
5666 }
5667
5668 let cast_fill = cast_scalar(fill_value, self.dtype)?;
5669 let values = self
5670 .values
5671 .iter()
5672 .map(|v| {
5673 if v.is_missing() {
5674 cast_fill.clone()
5675 } else {
5676 v.clone()
5677 }
5678 })
5679 .collect();
5680
5681 Self::new(self.dtype, values)
5682 }
5683
5684 pub fn dropna(&self) -> Result<Self, ColumnError> {
5686 let values = self
5687 .values
5688 .iter()
5689 .filter(|v| !v.is_missing())
5690 .cloned()
5691 .collect();
5692
5693 Self::new(self.dtype, values)
5694 }
5695
5696 pub fn take(&self, indices: &[usize]) -> Result<Self, ColumnError> {
5703 let mut out = Vec::with_capacity(indices.len());
5704 for &i in indices {
5705 match self.values.get(i) {
5706 Some(v) => out.push(v.clone()),
5707 None => {
5708 return Err(ColumnError::LengthMismatch {
5709 left: self.values.len(),
5710 right: i,
5711 });
5712 }
5713 }
5714 }
5715 Self::new(self.dtype, out)
5716 }
5717
5718 pub fn put(&self, indices: &[usize], values: &[Scalar]) -> Result<Self, ColumnError> {
5722 if indices.len() != values.len() {
5723 return Err(ColumnError::LengthMismatch {
5724 left: indices.len(),
5725 right: values.len(),
5726 });
5727 }
5728 let mut out = self.values.to_vec();
5729 for (&i, v) in indices.iter().zip(values) {
5730 if i >= out.len() {
5731 return Err(ColumnError::LengthMismatch {
5732 left: out.len(),
5733 right: i,
5734 });
5735 }
5736 out[i] = v.clone();
5737 }
5738 Self::new(self.dtype, out)
5739 }
5740
5741 pub fn slice(&self, start: usize, len: usize) -> Result<Self, ColumnError> {
5747 if start >= self.values.len() {
5748 return Self::new(self.dtype, Vec::new());
5749 }
5750 let end = start.saturating_add(len).min(self.values.len());
5751 let values = self.values[start..end].to_vec();
5752 Self::new(self.dtype, values)
5753 }
5754
5755 pub fn head(&self, n: i64) -> Result<Self, ColumnError> {
5760 let take = normalize_head_take(n, self.len());
5761 self.slice(0, take)
5762 }
5763
5764 pub fn tail(&self, n: i64) -> Result<Self, ColumnError> {
5769 let (start, len) = normalize_tail_window(n, self.len());
5770 self.slice(start, len)
5771 }
5772
5773 pub fn array_split(&self, n: usize) -> Result<Vec<Self>, ColumnError> {
5777 if n == 0 {
5778 return Ok(Vec::new());
5779 }
5780 let len = self.values.len();
5781 let base_size = len / n;
5782 let remainder = len % n;
5783 let mut result = Vec::with_capacity(n);
5784 let mut start = 0;
5785 for i in 0..n {
5786 let size = base_size + if i < remainder { 1 } else { 0 };
5787 let part = self.slice(start, size)?;
5788 result.push(part);
5789 start += size;
5790 }
5791 Ok(result)
5792 }
5793
5794 pub fn split(&self, n: usize) -> Result<Vec<Self>, ColumnError> {
5796 self.array_split(n)
5797 }
5798
5799 pub fn concat(&self, other: &Self) -> Result<Self, ColumnError> {
5804 if self.dtype != other.dtype {
5805 return Err(ColumnError::DTypeMismatch {
5806 left: self.dtype,
5807 right: other.dtype,
5808 });
5809 }
5810 let mut values = Vec::with_capacity(self.values.len() + other.values.len());
5811 values.extend_from_slice(&self.values);
5812 values.extend_from_slice(&other.values);
5813 Self::new(self.dtype, values)
5814 }
5815
5816 pub fn append(&self, other: &Self) -> Result<Self, ColumnError> {
5818 self.concat(other)
5819 }
5820
5821 pub fn insert(&self, index: usize, values: &[Scalar]) -> Result<Self, ColumnError> {
5825 let idx = index.min(self.values.len());
5826 let mut out = Vec::with_capacity(self.values.len() + values.len());
5827 out.extend_from_slice(&self.values[..idx]);
5828 out.extend_from_slice(values);
5829 out.extend_from_slice(&self.values[idx..]);
5830 Self::new(self.dtype, out)
5831 }
5832
5833 pub fn delete(&self, indices: &[usize]) -> Result<Self, ColumnError> {
5837 let mut to_delete: FxHashSet<usize> = FxHashSet::default();
5838 for &i in indices {
5839 to_delete.insert(i);
5840 }
5841 let out: Vec<Scalar> = self
5842 .values
5843 .iter()
5844 .enumerate()
5845 .filter(|(i, _)| !to_delete.contains(i))
5846 .map(|(_, v)| v.clone())
5847 .collect();
5848 Self::new(self.dtype, out)
5849 }
5850
5851 pub fn resize(&self, new_size: usize) -> Result<Self, ColumnError> {
5855 if new_size == 0 || self.values.is_empty() {
5856 return Self::new(self.dtype, Vec::new());
5857 }
5858 let mut out = Vec::with_capacity(new_size);
5859 let mut i = 0;
5860 while out.len() < new_size {
5861 out.push(self.values[i % self.values.len()].clone());
5862 i += 1;
5863 }
5864 Self::new(self.dtype, out)
5865 }
5866
5867 pub fn repeat(&self, repeats: usize) -> Result<Self, ColumnError> {
5872 if repeats == 0 {
5873 return Self::new(self.dtype, Vec::new());
5874 }
5875 if repeats == 1 {
5876 return Ok(self.clone());
5877 }
5878 let mut out = Vec::with_capacity(self.values.len() * repeats);
5879 for v in &self.values {
5880 for _ in 0..repeats {
5881 out.push(v.clone());
5882 }
5883 }
5884 Self::new(self.dtype, out)
5885 }
5886
5887 pub fn tile(&self, reps: usize) -> Result<Self, ColumnError> {
5892 if reps == 0 {
5893 return Self::new(self.dtype, Vec::new());
5894 }
5895 if reps == 1 {
5896 return Ok(self.clone());
5897 }
5898 let mut out = Vec::with_capacity(self.values.len() * reps);
5899 for _ in 0..reps {
5900 out.extend_from_slice(&self.values);
5901 }
5902 Self::new(self.dtype, out)
5903 }
5904
5905 pub fn reverse(&self) -> Result<Self, ColumnError> {
5909 let mut values = self.values.to_vec();
5910 values.reverse();
5911 Self::new(self.dtype, values)
5912 }
5913
5914 pub fn flip(&self) -> Result<Self, ColumnError> {
5916 self.reverse()
5917 }
5918
5919 pub fn roll(&self, shift: i64) -> Result<Self, ColumnError> {
5925 let len = self.len();
5926 if len == 0 {
5927 return Ok(self.clone());
5928 }
5929 let shift = ((shift % len as i64) + len as i64) as usize % len;
5930 if shift == 0 {
5931 return Ok(self.clone());
5932 }
5933 let mut out = Vec::with_capacity(len);
5934 let split = len - shift;
5935 out.extend_from_slice(&self.values[split..]);
5936 out.extend_from_slice(&self.values[..split]);
5937 Self::new(self.dtype, out)
5938 }
5939
5940 pub fn compress(&self, condition: &Self) -> Result<Self, ColumnError> {
5944 if self.len() != condition.len() {
5945 return Err(ColumnError::LengthMismatch {
5946 left: self.len(),
5947 right: condition.len(),
5948 });
5949 }
5950 let mut out = Vec::new();
5951 for (v, c) in self.values.iter().zip(&condition.values) {
5952 match c {
5953 Scalar::Bool(true) => out.push(v.clone()),
5954 Scalar::Bool(false) => {}
5955 _ => {
5956 return Err(ColumnError::Type(TypeError::NonNumericValue {
5957 value: format!("{c:?}"),
5958 dtype: condition.dtype,
5959 }));
5960 }
5961 }
5962 }
5963 Self::new(self.dtype, out)
5964 }
5965
5966 pub fn cumsum(&self) -> Result<Self, ColumnError> {
5972 if let Some(data) = self.as_f64_slice() {
5978 let mut running = 0.0_f64;
5979 let out: Vec<f64> = data
5980 .iter()
5981 .map(|&x| {
5982 running += x;
5983 running
5984 })
5985 .collect();
5986 return Ok(Self::from_f64_values(out));
5987 }
5988 let out = nancumsum(&self.values);
5989 Self::new(DType::Float64, out)
5990 }
5991
5992 pub fn cumprod(&self) -> Result<Self, ColumnError> {
5994 if let Some(data) = self.as_f64_slice() {
5996 let mut running = 1.0_f64;
5997 let out: Vec<f64> = data
5998 .iter()
5999 .map(|&x| {
6000 running *= x;
6001 running
6002 })
6003 .collect();
6004 return Ok(Self::from_f64_values(out));
6005 }
6006 let out = nancumprod(&self.values);
6007 Self::new(DType::Float64, out)
6008 }
6009
6010 pub fn cummax(&self) -> Result<Self, ColumnError> {
6012 if let Some(data) = self.as_f64_slice() {
6016 if let Some((&first, rest)) = data.split_first() {
6017 let mut running = first;
6018 let mut out = Vec::with_capacity(data.len());
6019 out.push(running);
6020 for &x in rest {
6021 running = running.max(x);
6022 out.push(running);
6023 }
6024 return Ok(Self::from_f64_values(out));
6025 }
6026 return Ok(Self::from_f64_values(Vec::new()));
6027 }
6028 let out = nancummax(&self.values);
6029 Self::new(DType::Float64, out)
6030 }
6031
6032 pub fn cummin(&self) -> Result<Self, ColumnError> {
6034 if let Some(data) = self.as_f64_slice() {
6036 if let Some((&first, rest)) = data.split_first() {
6037 let mut running = first;
6038 let mut out = Vec::with_capacity(data.len());
6039 out.push(running);
6040 for &x in rest {
6041 running = running.min(x);
6042 out.push(running);
6043 }
6044 return Ok(Self::from_f64_values(out));
6045 }
6046 return Ok(Self::from_f64_values(Vec::new()));
6047 }
6048 let out = nancummin(&self.values);
6049 Self::new(DType::Float64, out)
6050 }
6051
6052 #[must_use]
6057 pub fn sum(&self) -> Scalar {
6058 if let Some(data) = self.as_f64_slice() {
6064 let mut s = 0.0_f64;
6065 for &x in data {
6066 s += x;
6067 }
6068 return Scalar::Float64(s);
6069 }
6070 nansum(&self.values)
6071 }
6072
6073 #[must_use]
6078 pub fn mean(&self) -> Scalar {
6079 if let Some(data) = self.as_f64_slice() {
6082 if data.is_empty() {
6083 return Scalar::Null(NullKind::NaN);
6084 }
6085 let mut s = 0.0_f64;
6086 for &x in data {
6087 s += x;
6088 }
6089 return Scalar::Float64(s / data.len() as f64);
6090 }
6091 nanmean(&self.values)
6092 }
6093
6094 #[must_use]
6098 pub fn weighted_mean(&self, weights: &Self) -> Scalar {
6099 if self.len() != weights.len() {
6100 return Scalar::Null(NullKind::NaN);
6101 }
6102 let mut sum = 0.0;
6103 let mut weight_sum = 0.0;
6104 for (v, w) in self.values.iter().zip(weights.values()) {
6105 if v.is_missing() || w.is_missing() {
6106 continue;
6107 }
6108 let vf = match v.to_f64() {
6109 Ok(x) => x,
6110 Err(_) => continue,
6111 };
6112 let wf = match w.to_f64() {
6113 Ok(x) => x,
6114 Err(_) => continue,
6115 };
6116 sum += vf * wf;
6117 weight_sum += wf;
6118 }
6119 if weight_sum == 0.0 {
6120 return Scalar::Null(NullKind::NaN);
6121 }
6122 Scalar::Float64(sum / weight_sum)
6123 }
6124
6125 #[must_use]
6127 pub fn average(&self, weights: &Self) -> Scalar {
6128 self.weighted_mean(weights)
6129 }
6130
6131 #[must_use]
6136 pub fn min(&self) -> Scalar {
6137 if let Some(data) = self.as_f64_slice()
6143 && let Some((&first, rest)) = data.split_first()
6144 {
6145 let mut m = first;
6146 for &x in rest {
6147 if x < m {
6148 m = x;
6149 }
6150 }
6151 return Scalar::Float64(m);
6152 }
6153 if let Some(data) = self.as_i64_slice()
6154 && let Some((&first, rest)) = data.split_first()
6155 {
6156 let mut m = first;
6157 for &x in rest {
6158 if x < m {
6159 m = x;
6160 }
6161 }
6162 return Scalar::Int64(m);
6163 }
6164 nanmin(&self.values)
6165 }
6166
6167 #[must_use]
6171 pub fn max(&self) -> Scalar {
6172 if let Some(data) = self.as_f64_slice()
6175 && let Some((&first, rest)) = data.split_first()
6176 {
6177 let mut m = first;
6178 for &x in rest {
6179 if x > m {
6180 m = x;
6181 }
6182 }
6183 return Scalar::Float64(m);
6184 }
6185 if let Some(data) = self.as_i64_slice()
6186 && let Some((&first, rest)) = data.split_first()
6187 {
6188 let mut m = first;
6189 for &x in rest {
6190 if x > m {
6191 m = x;
6192 }
6193 }
6194 return Scalar::Int64(m);
6195 }
6196 nanmax(&self.values)
6197 }
6198
6199 #[must_use]
6203 pub fn median(&self) -> Scalar {
6204 nanmedian(&self.values)
6205 }
6206
6207 #[must_use]
6212 pub fn prod(&self) -> Scalar {
6213 if let Some(data) = self.as_f64_slice() {
6220 let mut p = 1.0_f64;
6221 for &x in data {
6222 p *= x;
6223 }
6224 return Scalar::Float64(p);
6225 }
6226 nanprod(&self.values)
6227 }
6228
6229 #[must_use]
6231 pub fn product(&self) -> Scalar {
6232 self.prod()
6233 }
6234
6235 #[must_use]
6237 pub fn nansum(&self) -> Scalar {
6238 self.sum()
6239 }
6240
6241 #[must_use]
6243 pub fn nanmean(&self) -> Scalar {
6244 self.mean()
6245 }
6246
6247 #[must_use]
6249 pub fn nanmin(&self) -> Scalar {
6250 self.min()
6251 }
6252
6253 #[must_use]
6255 pub fn nanmax(&self) -> Scalar {
6256 self.max()
6257 }
6258
6259 #[must_use]
6261 pub fn nanprod(&self) -> Scalar {
6262 self.prod()
6263 }
6264
6265 #[must_use]
6267 pub fn nanstd(&self, ddof: usize) -> Scalar {
6268 self.std(ddof)
6269 }
6270
6271 #[must_use]
6273 pub fn nanvar(&self, ddof: usize) -> Scalar {
6274 self.var(ddof)
6275 }
6276
6277 #[must_use]
6279 pub fn nanmedian(&self) -> Scalar {
6280 self.median()
6281 }
6282
6283 fn skipna_false_missing_result(&self, skipna: bool) -> Option<Scalar> {
6284 if skipna || !self.values.iter().any(Scalar::is_missing) {
6285 return None;
6286 }
6287
6288 Some(if matches!(self.dtype, DType::Timedelta64) {
6289 Scalar::Timedelta64(Timedelta::NAT)
6290 } else {
6291 Scalar::Float64(f64::NAN)
6292 })
6293 }
6294
6295 #[must_use]
6299 pub fn sum_skipna(&self, skipna: bool) -> Scalar {
6300 self.skipna_false_missing_result(skipna)
6301 .unwrap_or_else(|| self.sum())
6302 }
6303
6304 #[must_use]
6306 pub fn mean_skipna(&self, skipna: bool) -> Scalar {
6307 self.skipna_false_missing_result(skipna)
6308 .unwrap_or_else(|| self.mean())
6309 }
6310
6311 #[must_use]
6313 pub fn min_skipna(&self, skipna: bool) -> Scalar {
6314 self.skipna_false_missing_result(skipna)
6315 .unwrap_or_else(|| self.min())
6316 }
6317
6318 #[must_use]
6320 pub fn max_skipna(&self, skipna: bool) -> Scalar {
6321 self.skipna_false_missing_result(skipna)
6322 .unwrap_or_else(|| self.max())
6323 }
6324
6325 #[must_use]
6327 pub fn median_skipna(&self, skipna: bool) -> Scalar {
6328 self.skipna_false_missing_result(skipna)
6329 .unwrap_or_else(|| self.median())
6330 }
6331
6332 #[must_use]
6334 pub fn prod_skipna(&self, skipna: bool) -> Scalar {
6335 self.skipna_false_missing_result(skipna)
6336 .unwrap_or_else(|| self.prod())
6337 }
6338
6339 #[must_use]
6341 pub fn var_skipna(&self, ddof: usize, skipna: bool) -> Scalar {
6342 self.skipna_false_missing_result(skipna)
6343 .unwrap_or_else(|| self.var(ddof))
6344 }
6345
6346 #[must_use]
6348 pub fn std_skipna(&self, ddof: usize, skipna: bool) -> Scalar {
6349 self.skipna_false_missing_result(skipna)
6350 .unwrap_or_else(|| self.std(ddof))
6351 }
6352
6353 #[must_use]
6355 pub fn sem_skipna(&self, ddof: usize, skipna: bool) -> Scalar {
6356 self.skipna_false_missing_result(skipna)
6357 .unwrap_or_else(|| self.sem(ddof))
6358 }
6359
6360 #[must_use]
6364 pub fn count(&self) -> usize {
6365 self.values.iter().filter(|v| !v.is_missing()).count()
6366 }
6367
6368 pub fn ffill(&self, limit: Option<usize>) -> Result<Self, ColumnError> {
6375 let mut out = Vec::with_capacity(self.values.len());
6376 let mut last: Option<Scalar> = None;
6377 let mut run = 0usize;
6378 for v in &self.values {
6379 if !v.is_missing() {
6380 out.push(v.clone());
6381 last = Some(v.clone());
6382 run = 0;
6383 continue;
6384 }
6385 match (&last, limit) {
6386 (Some(prev), None) => out.push(prev.clone()),
6387 (Some(prev), Some(cap)) if run < cap => {
6388 out.push(prev.clone());
6389 run += 1;
6390 }
6391 _ => out.push(v.clone()),
6392 }
6393 }
6394 Self::new(self.dtype, out)
6395 }
6396
6397 pub fn pad(&self, limit: Option<usize>) -> Result<Self, ColumnError> {
6399 self.ffill(limit)
6400 }
6401
6402 pub fn bfill(&self, limit: Option<usize>) -> Result<Self, ColumnError> {
6408 let mut out = vec![Scalar::Null(NullKind::NaN); self.values.len()];
6409 let mut next: Option<Scalar> = None;
6410 let mut run = 0usize;
6411 for (i, v) in self.values.iter().enumerate().rev() {
6412 if !v.is_missing() {
6413 out[i] = v.clone();
6414 next = Some(v.clone());
6415 run = 0;
6416 continue;
6417 }
6418 match (&next, limit) {
6419 (Some(nxt), None) => out[i] = nxt.clone(),
6420 (Some(nxt), Some(cap)) if run < cap => {
6421 out[i] = nxt.clone();
6422 run += 1;
6423 }
6424 _ => out[i] = v.clone(),
6425 }
6426 }
6427 Self::new(self.dtype, out)
6428 }
6429
6430 pub fn backfill(&self, limit: Option<usize>) -> Result<Self, ColumnError> {
6432 self.bfill(limit)
6433 }
6434
6435 #[must_use]
6439 pub fn nunique(&self) -> Scalar {
6440 self.nunique_with_dropna(true)
6441 }
6442
6443 #[must_use]
6448 pub fn nunique_with_dropna(&self, dropna: bool) -> Scalar {
6449 if let Some(data) = self.as_i64_slice()
6455 && let Some((min, range)) = i64_direct_address_range(data)
6456 {
6457 let mut seen = vec![false; range];
6458 let mut distinct = 0i64;
6459 for &v in data {
6460 let slot = (v as i128 - min as i128) as usize;
6461 if !seen[slot] {
6462 seen[slot] = true;
6463 distinct += 1;
6464 }
6465 }
6466 return Scalar::Int64(distinct);
6467 }
6468
6469 let mut distinct = match nannunique(&self.values) {
6470 Scalar::Int64(count) => count,
6471 _ => 0,
6472 };
6473
6474 if !dropna && self.values.iter().any(Scalar::is_missing) {
6475 distinct += 1;
6476 }
6477
6478 Scalar::Int64(distinct)
6479 }
6480
6481 #[must_use]
6486 pub fn any(&self) -> Scalar {
6487 nanany(&self.values)
6488 }
6489
6490 #[must_use]
6495 pub fn all(&self) -> Scalar {
6496 nanall(&self.values)
6497 }
6498
6499 pub fn diff_valid(&self) -> Result<Self, ColumnError> {
6508 let mut prev: Option<f64> = None;
6509 let mut out = Vec::with_capacity(self.values.len());
6510 for v in &self.values {
6511 if v.is_missing() {
6512 out.push(Scalar::Null(NullKind::NaN));
6513 continue;
6514 }
6515 match v.to_f64() {
6516 Ok(x) if !x.is_nan() => match prev {
6517 Some(p) => {
6518 out.push(Scalar::Float64(x - p));
6519 prev = Some(x);
6520 }
6521 None => {
6522 out.push(Scalar::Null(NullKind::NaN));
6523 prev = Some(x);
6524 }
6525 },
6526 Ok(_) => out.push(Scalar::Null(NullKind::NaN)),
6527 Err(err) => return Err(ColumnError::Type(err)),
6528 }
6529 }
6530 Self::new(DType::Float64, out)
6531 }
6532
6533 pub fn sample(&self, n: usize, seed: u64) -> Result<Self, ColumnError> {
6542 let len = self.values.len();
6543 if n >= len {
6544 return Ok(self.clone());
6545 }
6546 let mut indices: Vec<usize> = (0..len).collect();
6547 let mut state = seed.wrapping_add(0x9E3779B97F4A7C15);
6548 for i in 0..n {
6549 state = state
6551 .wrapping_mul(6364136223846793005)
6552 .wrapping_add(1442695040888963407);
6553 let bound = (len - i) as u64;
6554 let pick = i + (state.wrapping_shr(33) % bound) as usize;
6555 indices.swap(i, pick);
6556 }
6557 let values: Vec<Scalar> = indices[..n]
6558 .iter()
6559 .map(|&idx| self.values[idx].clone())
6560 .collect();
6561 Self::new(self.dtype, values)
6562 }
6563
6564 #[must_use]
6571 pub fn first_valid(&self) -> Option<usize> {
6572 self.values.iter().position(|v| !v.is_missing())
6573 }
6574
6575 #[must_use]
6578 pub fn first_valid_index(&self) -> Option<usize> {
6579 self.first_valid()
6580 }
6581
6582 #[must_use]
6587 pub fn last_valid(&self) -> Option<usize> {
6588 self.values.iter().rposition(|v| !v.is_missing())
6589 }
6590
6591 #[must_use]
6594 pub fn last_valid_index(&self) -> Option<usize> {
6595 self.last_valid()
6596 }
6597
6598 pub fn rolling_window_sum(
6607 &self,
6608 window: usize,
6609 min_periods: usize,
6610 ) -> Result<Self, ColumnError> {
6611 let len = self.values.len();
6612 if window == 0 {
6613 return Self::new(DType::Float64, vec![Scalar::Null(NullKind::NaN); len]);
6614 }
6615 let mut out = Vec::with_capacity(len);
6616 for i in 0..len {
6617 let start = (i + 1).saturating_sub(window);
6618 let end = i + 1;
6619 let mut sum = 0.0_f64;
6620 let mut observed = 0usize;
6621 for v in &self.values[start..end] {
6622 if v.is_missing() {
6623 continue;
6624 }
6625 match v.to_f64() {
6626 Ok(x) if !x.is_nan() => {
6627 sum += x;
6628 observed += 1;
6629 }
6630 Ok(_) => {}
6631 Err(err) => return Err(ColumnError::Type(err)),
6632 }
6633 }
6634 if observed >= min_periods.max(1) || (min_periods == 0 && end - start > 0) {
6635 out.push(Scalar::Float64(sum));
6636 } else {
6637 out.push(Scalar::Null(NullKind::NaN));
6638 }
6639 }
6640 Self::new(DType::Float64, out)
6641 }
6642
6643 pub fn isnull(&self) -> Result<Self, ColumnError> {
6647 let out: Vec<Scalar> = self
6648 .values
6649 .iter()
6650 .map(|v| Scalar::Bool(v.is_missing()))
6651 .collect();
6652 Self::new(DType::Bool, out)
6653 }
6654
6655 pub fn isna(&self) -> Result<Self, ColumnError> {
6657 self.isnull()
6658 }
6659
6660 pub fn notnull(&self) -> Result<Self, ColumnError> {
6664 let out: Vec<Scalar> = self
6665 .values
6666 .iter()
6667 .map(|v| Scalar::Bool(!v.is_missing()))
6668 .collect();
6669 Self::new(DType::Bool, out)
6670 }
6671
6672 pub fn notna(&self) -> Result<Self, ColumnError> {
6674 self.notnull()
6675 }
6676
6677 pub fn isfinite(&self) -> Result<Self, ColumnError> {
6679 let out: Vec<Scalar> = self
6680 .values
6681 .iter()
6682 .map(|v| match v {
6683 Scalar::Float64(f) => Scalar::Bool(f.is_finite()),
6684 Scalar::Int64(_) => Scalar::Bool(true),
6685 _ if v.is_missing() => Scalar::Bool(false),
6686 _ => Scalar::Bool(true),
6687 })
6688 .collect();
6689 Self::new(DType::Bool, out)
6690 }
6691
6692 pub fn isinf(&self) -> Result<Self, ColumnError> {
6694 let out: Vec<Scalar> = self
6695 .values
6696 .iter()
6697 .map(|v| match v {
6698 Scalar::Float64(f) => Scalar::Bool(f.is_infinite()),
6699 _ => Scalar::Bool(false),
6700 })
6701 .collect();
6702 Self::new(DType::Bool, out)
6703 }
6704
6705 pub fn isnan(&self) -> Result<Self, ColumnError> {
6707 let out: Vec<Scalar> = self
6708 .values
6709 .iter()
6710 .map(|v| match v {
6711 Scalar::Float64(f) => Scalar::Bool(f.is_nan()),
6712 Scalar::Null(NullKind::NaN) => Scalar::Bool(true),
6713 _ => Scalar::Bool(false),
6714 })
6715 .collect();
6716 Self::new(DType::Bool, out)
6717 }
6718
6719 #[must_use]
6723 pub fn var(&self, ddof: usize) -> Scalar {
6724 if let Some(data) = self.as_f64_slice() {
6731 let n = data.len();
6732 if n <= ddof {
6733 return Scalar::Null(NullKind::NaN);
6734 }
6735 let mean: f64 = data.iter().sum::<f64>() / n as f64;
6736 let sum_sq: f64 = data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>();
6737 return Scalar::Float64(sum_sq / (n - ddof) as f64);
6738 }
6739 nanvar(&self.values, ddof)
6740 }
6741
6742 #[must_use]
6746 pub fn std(&self, ddof: usize) -> Scalar {
6747 if self.as_f64_slice().is_some() {
6751 return match self.var(ddof) {
6752 Scalar::Float64(v) => Scalar::Float64(v.sqrt()),
6753 other => other,
6754 };
6755 }
6756 nanstd(&self.values, ddof)
6757 }
6758
6759 #[must_use]
6763 pub fn sem(&self, ddof: usize) -> Scalar {
6764 nansem(&self.values, ddof)
6765 }
6766
6767 #[must_use]
6772 pub fn cov(&self, other: &Self) -> Scalar {
6773 self.cov_ddof(other, 1)
6774 }
6775
6776 #[must_use]
6778 pub fn cov_ddof(&self, other: &Self, ddof: usize) -> Scalar {
6779 let n = self.values.len().min(other.values.len());
6780 if n == 0 {
6781 return Scalar::Null(NullKind::NaN);
6782 }
6783 let mut sum_x = 0.0;
6784 let mut sum_y = 0.0;
6785 let mut count = 0usize;
6786 for i in 0..n {
6787 let x = match self.values[i].to_f64() {
6788 Ok(v) if v.is_finite() => v,
6789 _ => continue,
6790 };
6791 let y = match other.values[i].to_f64() {
6792 Ok(v) if v.is_finite() => v,
6793 _ => continue,
6794 };
6795 sum_x += x;
6796 sum_y += y;
6797 count += 1;
6798 }
6799 if count <= ddof {
6800 return Scalar::Null(NullKind::NaN);
6801 }
6802 let mean_x = sum_x / count as f64;
6803 let mean_y = sum_y / count as f64;
6804 let mut cov_sum = 0.0;
6805 for i in 0..n {
6806 let x = match self.values[i].to_f64() {
6807 Ok(v) if v.is_finite() => v,
6808 _ => continue,
6809 };
6810 let y = match other.values[i].to_f64() {
6811 Ok(v) if v.is_finite() => v,
6812 _ => continue,
6813 };
6814 cov_sum += (x - mean_x) * (y - mean_y);
6815 }
6816 Scalar::Float64(cov_sum / (count - ddof) as f64)
6817 }
6818
6819 #[must_use]
6823 pub fn corr(&self, other: &Self) -> Scalar {
6824 let n = self.values.len().min(other.values.len());
6825 if n == 0 {
6826 return Scalar::Null(NullKind::NaN);
6827 }
6828 let mut sum_x = 0.0;
6829 let mut sum_y = 0.0;
6830 let mut sum_xx = 0.0;
6831 let mut sum_yy = 0.0;
6832 let mut sum_xy = 0.0;
6833 let mut count = 0usize;
6834 for i in 0..n {
6835 let x = match self.values[i].to_f64() {
6836 Ok(v) if v.is_finite() => v,
6837 _ => continue,
6838 };
6839 let y = match other.values[i].to_f64() {
6840 Ok(v) if v.is_finite() => v,
6841 _ => continue,
6842 };
6843 sum_x += x;
6844 sum_y += y;
6845 sum_xx += x * x;
6846 sum_yy += y * y;
6847 sum_xy += x * y;
6848 count += 1;
6849 }
6850 if count < 2 {
6851 return Scalar::Null(NullKind::NaN);
6852 }
6853 let n_f = count as f64;
6854 let numerator = n_f * sum_xy - sum_x * sum_y;
6855 let denom_x = (n_f * sum_xx - sum_x * sum_x).sqrt();
6856 let denom_y = (n_f * sum_yy - sum_y * sum_y).sqrt();
6857 if denom_x == 0.0 || denom_y == 0.0 {
6858 return Scalar::Null(NullKind::NaN);
6859 }
6860 Scalar::Float64(numerator / (denom_x * denom_y))
6861 }
6862
6863 #[must_use]
6867 pub fn autocorr(&self, lag: usize) -> Scalar {
6868 if lag >= self.values.len() {
6869 return Scalar::Null(NullKind::NaN);
6870 }
6871 let shifted = match self.shift(lag as i64, Scalar::Null(NullKind::NaN)) {
6872 Ok(s) => s,
6873 Err(_) => return Scalar::Null(NullKind::NaN),
6874 };
6875 self.corr(&shifted)
6876 }
6877
6878 #[must_use]
6883 pub fn skew(&self) -> Scalar {
6884 nanskew(&self.values)
6885 }
6886
6887 #[must_use]
6892 pub fn kurt(&self) -> Scalar {
6893 nankurt(&self.values)
6894 }
6895
6896 #[must_use]
6898 pub fn kurtosis(&self) -> Scalar {
6899 self.kurt()
6900 }
6901
6902 #[must_use]
6907 pub fn ptp(&self) -> Scalar {
6908 nanptp(&self.values)
6909 }
6910
6911 #[must_use]
6915 pub fn is_unique(&self) -> bool {
6916 !self.has_duplicates()
6917 }
6918
6919 #[must_use]
6923 pub fn has_duplicates(&self) -> bool {
6924 #[derive(Hash, PartialEq, Eq)]
6925 enum Key<'a> {
6926 Bool(bool),
6927 Int64(i64),
6928 FloatBits(u64),
6929 Utf8(&'a str),
6930 Timedelta64(i64),
6931 Datetime64(i64),
6932 Period(i64),
6933 Interval(u64, u64, IntervalClosed),
6934 }
6935 let mut seen: FxHashSet<Key<'_>> = FxHashSet::default();
6936 for v in &self.values {
6937 if v.is_missing() {
6938 continue;
6939 }
6940 let key = match v {
6941 Scalar::Bool(b) => Key::Bool(*b),
6942 Scalar::Int64(i) => Key::Int64(*i),
6943 Scalar::Float64(f) => {
6944 let norm = if *f == 0.0 { 0.0 } else { *f };
6945 Key::FloatBits(norm.to_bits())
6946 }
6947 Scalar::Utf8(s) => Key::Utf8(s.as_str()),
6948 Scalar::Timedelta64(v) => Key::Timedelta64(*v),
6949 Scalar::Datetime64(v) => Key::Datetime64(*v),
6950 Scalar::Period(v) => Key::Period(*v),
6951 Scalar::Interval(v) => {
6952 let (left, right, closed) = interval_key(v);
6953 Key::Interval(left, right, closed)
6954 }
6955 Scalar::Null(_) => continue,
6956 };
6957 if !seen.insert(key) {
6958 return true;
6959 }
6960 }
6961 false
6962 }
6963
6964 pub fn pct_change(&self, periods: i64) -> Result<Self, ColumnError> {
6971 let len = self.values.len();
6972 if len == 0 || periods == 0 {
6973 return Self::new(DType::Float64, vec![Scalar::Null(NullKind::NaN); len]);
6974 }
6975 let abs = periods.unsigned_abs() as usize;
6976 let mut out: Vec<Scalar> = Vec::with_capacity(len);
6977 for i in 0..len {
6978 let prev_idx = if periods > 0 {
6979 i.checked_sub(abs)
6980 } else if i + abs < len {
6981 Some(i + abs)
6982 } else {
6983 None
6984 };
6985 let Some(pi) = prev_idx else {
6986 out.push(Scalar::Null(NullKind::NaN));
6987 continue;
6988 };
6989 let cur = &self.values[i];
6990 let prev = &self.values[pi];
6991 if cur.is_missing() || prev.is_missing() {
6992 out.push(Scalar::Null(NullKind::NaN));
6993 continue;
6994 }
6995 if let (Scalar::Timedelta64(cur_ns), Scalar::Timedelta64(prev_ns)) = (cur, prev) {
6999 if *cur_ns == Timedelta::NAT || *prev_ns == Timedelta::NAT {
7000 out.push(Scalar::Null(NullKind::NaN));
7001 continue;
7002 }
7003 let prev_f = *prev_ns as f64;
7004 if prev_f.abs() < f64::EPSILON {
7005 out.push(Scalar::Null(NullKind::NaN));
7006 } else {
7007 out.push(Scalar::Float64((*cur_ns as f64 - prev_f) / prev_f));
7008 }
7009 continue;
7010 }
7011 match (cur.to_f64(), prev.to_f64()) {
7012 (Ok(c), Ok(p)) => {
7013 if p == 0.0 || p.is_nan() || c.is_nan() {
7014 out.push(Scalar::Null(NullKind::NaN));
7015 } else {
7016 out.push(Scalar::Float64((c - p) / p));
7017 }
7018 }
7019 _ => out.push(Scalar::Null(NullKind::NaN)),
7020 }
7021 }
7022 Self::new(DType::Float64, out)
7023 }
7024
7025 pub fn pct_change_with_fill(
7033 &self,
7034 periods: i64,
7035 fill_method: Option<&str>,
7036 limit: Option<usize>,
7037 ) -> Result<Self, ColumnError> {
7038 let filled = match fill_method {
7039 None => self.clone(),
7040 Some(method) => match method {
7041 "ffill" | "pad" => self.ffill(limit)?,
7042 "bfill" | "backfill" => self.bfill(limit)?,
7043 other => {
7044 return Err(ColumnError::Type(TypeError::NonNumericValue {
7045 value: other.to_string(),
7046 dtype: self.dtype,
7047 }));
7048 }
7049 },
7050 };
7051 filled.pct_change(periods)
7052 }
7053
7054 pub fn describe(&self) -> Result<Vec<(&'static str, Scalar)>, ColumnError> {
7063 if !matches!(
7064 self.dtype,
7065 DType::Int64 | DType::Float64 | DType::Timedelta64
7066 ) {
7067 return Err(ColumnError::Type(TypeError::NonNumericValue {
7068 value: format!("{:?}", self.dtype),
7069 dtype: self.dtype,
7070 }));
7071 }
7072 let count = Scalar::Int64(self.count() as i64);
7073 let mean = self.mean();
7074 let std = {
7075 let nums: Vec<f64> = self
7076 .values
7077 .iter()
7078 .filter(|v| !v.is_missing())
7079 .filter_map(|v| v.to_f64().ok())
7080 .collect();
7081 if nums.len() < 2 {
7082 Scalar::Null(NullKind::NaN)
7083 } else {
7084 let mu = nums.iter().sum::<f64>() / nums.len() as f64;
7085 let ss: f64 = nums.iter().map(|x| (x - mu).powi(2)).sum();
7086 Scalar::Float64((ss / (nums.len() as f64 - 1.0)).sqrt())
7087 }
7088 };
7089 let q25 = self.quantile(0.25);
7090 let q50 = self.quantile(0.5);
7091 let q75 = self.quantile(0.75);
7092 let min = self.min();
7093 let max = self.max();
7094 Ok(vec![
7095 ("count", count),
7096 ("mean", mean),
7097 ("std", std),
7098 ("min", min),
7099 ("25%", q25),
7100 ("50%", q50),
7101 ("75%", q75),
7102 ("max", max),
7103 ])
7104 }
7105
7106 pub fn combine<F>(
7114 &self,
7115 other: &Self,
7116 mut func: F,
7117 fill: Option<Scalar>,
7118 ) -> Result<Self, ColumnError>
7119 where
7120 F: FnMut(&Scalar, &Scalar) -> Scalar,
7121 {
7122 if self.values.len() != other.values.len() {
7123 return Err(ColumnError::LengthMismatch {
7124 left: self.values.len(),
7125 right: other.values.len(),
7126 });
7127 }
7128 let out: Vec<Scalar> = self
7129 .values
7130 .iter()
7131 .zip(other.values.iter())
7132 .map(|(a, b)| {
7133 let a_miss = a.is_missing();
7134 let b_miss = b.is_missing();
7135 match (a_miss || b_miss, fill.as_ref()) {
7136 (true, None) => Scalar::Null(NullKind::NaN),
7138 (_, fill_opt) => {
7139 let default = fill_opt.unwrap_or(a);
7140 let left = if a_miss { default } else { a };
7141 let right = if b_miss { fill_opt.unwrap_or(b) } else { b };
7142 func(left, right)
7143 }
7144 }
7145 })
7146 .collect();
7147 let inferred = infer_dtype(&out).unwrap_or(self.dtype);
7148 Self::new(inferred, out)
7149 }
7150
7151 pub fn apply_float<F>(&self, mut func: F) -> Result<Self, ColumnError>
7160 where
7161 F: FnMut(f64) -> f64,
7162 {
7163 let mut out = Vec::with_capacity(self.values.len());
7164 for v in &self.values {
7165 if v.is_missing() {
7166 out.push(Scalar::Null(NullKind::NaN));
7167 continue;
7168 }
7169 match v.to_f64() {
7170 Ok(x) => {
7171 let y = func(x);
7172 if y.is_nan() {
7173 out.push(Scalar::Null(NullKind::NaN));
7174 } else {
7175 out.push(Scalar::Float64(y));
7176 }
7177 }
7178 Err(err) => return Err(ColumnError::Type(err)),
7179 }
7180 }
7181 Self::new(DType::Float64, out)
7182 }
7183
7184 #[must_use]
7192 pub fn hist_counts(&self, bins: usize) -> Vec<usize> {
7193 if bins == 0 {
7194 return Vec::new();
7195 }
7196 let nums: Vec<f64> = self
7197 .values
7198 .iter()
7199 .filter(|v| !v.is_missing())
7200 .filter_map(|v| v.to_f64().ok())
7201 .filter(|f| !f.is_nan())
7202 .collect();
7203 if nums.is_empty() {
7204 return vec![0; bins];
7205 }
7206 let (min, max) = nums
7207 .iter()
7208 .fold((f64::INFINITY, f64::NEG_INFINITY), |(lo, hi), &x| {
7209 (lo.min(x), hi.max(x))
7210 });
7211 if (max - min).abs() < f64::EPSILON {
7212 let mut counts = vec![0; bins];
7214 counts[0] = nums.len();
7215 return counts;
7216 }
7217 let width = (max - min) / bins as f64;
7218 let mut counts = vec![0usize; bins];
7219 for x in &nums {
7220 let mut idx = ((x - min) / width) as usize;
7221 if idx >= bins {
7222 idx = bins - 1;
7223 }
7224 counts[idx] += 1;
7225 }
7226 counts
7227 }
7228
7229 #[must_use]
7235 pub fn argmin(&self) -> Option<usize> {
7236 nanargmin(&self.values)
7237 }
7238
7239 #[must_use]
7242 pub fn idxmin(&self) -> Option<usize> {
7243 self.argmin()
7244 }
7245
7246 #[must_use]
7251 pub fn argmax(&self) -> Option<usize> {
7252 nanargmax(&self.values)
7253 }
7254
7255 #[must_use]
7258 pub fn idxmax(&self) -> Option<usize> {
7259 self.argmax()
7260 }
7261
7262 #[must_use]
7264 pub fn nanargmin(&self) -> Option<usize> {
7265 self.argmin()
7266 }
7267
7268 #[must_use]
7270 pub fn nanargmax(&self) -> Option<usize> {
7271 self.argmax()
7272 }
7273
7274 #[must_use]
7280 pub fn is_monotonic_increasing(&self) -> bool {
7281 is_monotonic_in_direction(&self.values, true)
7282 }
7283
7284 #[must_use]
7288 pub fn is_monotonic_decreasing(&self) -> bool {
7289 is_monotonic_in_direction(&self.values, false)
7290 }
7291
7292 pub fn combine_first(&self, other: &Self) -> Result<Self, ColumnError> {
7300 if self.values.len() != other.values.len() {
7301 return Err(ColumnError::LengthMismatch {
7302 left: self.values.len(),
7303 right: other.values.len(),
7304 });
7305 }
7306 let out: Vec<Scalar> = self
7307 .values
7308 .iter()
7309 .zip(other.values.iter())
7310 .map(|(a, b)| if a.is_missing() { b.clone() } else { a.clone() })
7311 .collect();
7312 Self::new(self.dtype, out)
7313 }
7314
7315 pub fn clip_lower(&self, lower: f64) -> Result<Self, ColumnError> {
7321 self.clip(Some(lower), None)
7322 }
7323
7324 pub fn clip_upper(&self, upper: f64) -> Result<Self, ColumnError> {
7328 self.clip(None, Some(upper))
7329 }
7330
7331 pub fn drop_duplicates(&self) -> Result<Self, ColumnError> {
7335 self.drop_duplicates_keep("first")
7336 }
7337
7338 pub fn drop_duplicates_keep(&self, keep: &str) -> Result<Self, ColumnError> {
7343 let dup = self.duplicated_keep(keep)?;
7344 let mut out = Vec::with_capacity(self.values.len());
7345 for (v, keep_flag) in self.values.iter().zip(dup.values.iter()) {
7346 if matches!(keep_flag, Scalar::Bool(false)) {
7347 out.push(v.clone());
7348 }
7349 }
7350 Self::new(self.dtype, out)
7351 }
7352
7353 pub fn compare(&self, other: &Self) -> Result<(Self, Self), ColumnError> {
7361 if self.values.len() != other.values.len() {
7362 return Err(ColumnError::LengthMismatch {
7363 left: self.values.len(),
7364 right: other.values.len(),
7365 });
7366 }
7367 let mut left = Vec::new();
7368 let mut right = Vec::new();
7369 for (a, b) in self.values.iter().zip(other.values.iter()) {
7370 let equal = match (a.is_missing(), b.is_missing()) {
7371 (true, true) => true,
7372 (true, false) | (false, true) => false,
7373 (false, false) => a.semantic_eq(b),
7374 };
7375 if !equal {
7376 left.push(a.clone());
7377 right.push(b.clone());
7378 }
7379 }
7380 Ok((Self::new(self.dtype, left)?, Self::new(other.dtype, right)?))
7381 }
7382
7383 pub fn map<F>(&self, mut func: F) -> Result<Self, ColumnError>
7391 where
7392 F: FnMut(&Scalar) -> Scalar,
7393 {
7394 let out: Vec<Scalar> = self.values.iter().map(&mut func).collect();
7395 let target = infer_dtype(&out).unwrap_or(self.dtype);
7396 Self::new(target, out)
7397 }
7398
7399 pub fn interpolate_linear(&self) -> Result<Self, ColumnError> {
7408 let len = self.values.len();
7409 let mut floats: Vec<Option<f64>> = Vec::with_capacity(len);
7411 for v in &self.values {
7412 if v.is_missing() {
7413 floats.push(None);
7414 continue;
7415 }
7416 match v.to_f64() {
7417 Ok(x) if !x.is_nan() => floats.push(Some(x)),
7418 Ok(_) => floats.push(None),
7419 Err(err) => return Err(ColumnError::Type(err)),
7420 }
7421 }
7422
7423 let first = floats.iter().position(Option::is_some);
7425 let last = floats.iter().rposition(Option::is_some);
7426 if let (Some(start), Some(end)) = (first, last) {
7427 let mut i = start;
7428 while i < end {
7429 if floats[i].is_some() {
7430 i += 1;
7431 continue;
7432 }
7433 let gap_start = i;
7434 while i < end && floats[i].is_none() {
7435 i += 1;
7436 }
7437 let before = floats[gap_start - 1].expect("anchor");
7438 let after = floats[i].expect("anchor");
7439 let span = (i - gap_start + 1) as f64;
7440 for (k, j) in (gap_start..i).enumerate() {
7441 let step = (k + 1) as f64;
7442 floats[j] = Some(before + (after - before) * (step / span));
7443 }
7444 }
7445 let last_valid = floats[end].expect("last valid anchor");
7450 for slot in floats.iter_mut().skip(end + 1) {
7451 *slot = Some(last_valid);
7452 }
7453 }
7454
7455 let out: Vec<Scalar> = floats
7456 .into_iter()
7457 .map(|opt| match opt {
7458 Some(x) => Scalar::Float64(x),
7459 None => Scalar::Null(NullKind::NaN),
7460 })
7461 .collect();
7462 Self::new(DType::Float64, out)
7463 }
7464
7465 pub fn interpolate(&self) -> Result<Self, ColumnError> {
7468 self.interpolate_linear()
7469 }
7470
7471 #[must_use]
7477 pub fn quantile(&self, q: f64) -> Scalar {
7478 nanquantile(&self.values, q)
7479 }
7480
7481 #[must_use]
7485 pub fn percentile(&self, p: f64) -> Scalar {
7486 self.quantile(p / 100.0)
7487 }
7488
7489 #[must_use]
7491 pub fn nanquantile(&self, q: f64) -> Scalar {
7492 self.quantile(q)
7493 }
7494
7495 #[must_use]
7497 pub fn nanpercentile(&self, p: f64) -> Scalar {
7498 self.percentile(p)
7499 }
7500
7501 pub fn mode(&self) -> Result<Self, ColumnError> {
7507 if let Some(data) = self.as_i64_slice()
7519 && let Some((min, range)) = i64_direct_address_range(data)
7520 {
7521 let mut count = vec![0i64; range];
7522 for &v in data {
7523 count[(v as i128 - min as i128) as usize] += 1;
7524 }
7525 let max_count = count.iter().copied().max().unwrap_or(0);
7526 let mut winners = Vec::new();
7527 for (s, &c) in count.iter().enumerate() {
7528 if c == max_count {
7529 winners.push(Scalar::Int64(min + s as i64));
7530 }
7531 }
7532 return Self::new(self.dtype, winners);
7533 }
7534
7535 #[derive(Hash, PartialEq, Eq)]
7536 enum Key<'a> {
7537 Bool(bool),
7538 Int64(i64),
7539 FloatBits(u64),
7540 Utf8(&'a str),
7541 Timedelta64(i64),
7542 Datetime64(i64),
7543 Period(i64),
7544 Interval(u64, u64, IntervalClosed),
7545 }
7546 fn key_of(v: &Scalar) -> Option<Key<'_>> {
7547 if v.is_missing() {
7548 return None;
7549 }
7550 Some(match v {
7551 Scalar::Bool(b) => Key::Bool(*b),
7552 Scalar::Int64(i) => Key::Int64(*i),
7553 Scalar::Float64(f) => {
7554 let norm = if *f == 0.0 { 0.0 } else { *f };
7555 Key::FloatBits(norm.to_bits())
7556 }
7557 Scalar::Utf8(s) => Key::Utf8(s.as_str()),
7558 Scalar::Timedelta64(v) => Key::Timedelta64(*v),
7559 Scalar::Datetime64(v) => Key::Datetime64(*v),
7560 Scalar::Period(v) => Key::Period(*v),
7561 Scalar::Interval(v) => {
7562 let (left, right, closed) = interval_key(v);
7563 Key::Interval(left, right, closed)
7564 }
7565 Scalar::Null(_) => return None,
7566 })
7567 }
7568
7569 let mut counts: FxHashMap<Key<'_>, (usize, &Scalar)> = FxHashMap::default();
7570 for v in &self.values {
7571 if let Some(k) = key_of(v) {
7572 counts
7573 .entry(k)
7574 .and_modify(|entry| entry.0 += 1)
7575 .or_insert((1, v));
7576 }
7577 }
7578 if counts.is_empty() {
7579 return Self::new(self.dtype, Vec::new());
7580 }
7581 let max_count = counts.values().map(|(c, _)| *c).max().unwrap_or(0);
7582 let mut winners: Vec<Scalar> = counts
7583 .values()
7584 .filter_map(|(c, v)| {
7585 if *c == max_count {
7586 Some((*v).clone())
7587 } else {
7588 None
7589 }
7590 })
7591 .collect();
7592 winners.sort_by(|a, b| compare_scalars_na_last(a, b, true));
7593 Self::new(self.dtype, winners)
7594 }
7595
7596 #[must_use]
7604 pub fn memory_usage(&self, deep: bool) -> usize {
7605 let element_bytes = match self.dtype {
7606 DType::Bool => 1,
7607 DType::Int64 | DType::Float64 | DType::Timedelta64 => 8,
7608 DType::Utf8 => std::mem::size_of::<usize>(),
7609 _ => 0,
7610 };
7611 let base = element_bytes * self.values.len();
7612 let deep_extra = if deep && self.dtype == DType::Utf8 {
7613 self.values
7614 .iter()
7615 .map(|v| match v {
7616 Scalar::Utf8(s) => s.len(),
7617 _ => 0,
7618 })
7619 .sum::<usize>()
7620 } else {
7621 0
7622 };
7623 let validity_bytes = self.values.len().div_ceil(8);
7625 base + deep_extra + validity_bytes
7626 }
7627
7628 #[must_use]
7630 pub fn nbytes(&self) -> usize {
7631 self.memory_usage(false)
7632 }
7633
7634 #[must_use]
7639 pub fn itemsize(&self) -> usize {
7640 match self.dtype() {
7641 DType::Bool | DType::BoolNullable => 1,
7642 DType::Int64
7643 | DType::Int64Nullable
7644 | DType::Float64
7645 | DType::Datetime64
7646 | DType::Timedelta64
7647 | DType::Period => 8,
7648 DType::Utf8 => {
7649 if self.values.is_empty() {
7650 0
7651 } else {
7652 self.memory_usage(true) / self.values.len()
7653 }
7654 }
7655 DType::Null | DType::Categorical | DType::Interval | DType::Sparse => 8,
7656 }
7657 }
7658
7659 pub fn equals(&self, other: &Self) -> Result<Self, ColumnError> {
7665 if self.values.len() != other.values.len() {
7666 return Err(ColumnError::LengthMismatch {
7667 left: self.values.len(),
7668 right: other.values.len(),
7669 });
7670 }
7671 let out: Vec<Scalar> = self
7672 .values
7673 .iter()
7674 .zip(other.values.iter())
7675 .map(|(a, b)| {
7676 if a.is_missing() || b.is_missing() {
7677 Scalar::Bool(false)
7678 } else {
7679 Scalar::Bool(a.semantic_eq(b))
7680 }
7681 })
7682 .collect();
7683 Self::new(DType::Bool, out)
7684 }
7685
7686 pub fn dot(&self, other: &Self) -> Result<f64, ColumnError> {
7694 if self.values.len() != other.values.len() {
7695 return Err(ColumnError::LengthMismatch {
7696 left: self.values.len(),
7697 right: other.values.len(),
7698 });
7699 }
7700 let mut sum = 0.0_f64;
7701 for (a, b) in self.values.iter().zip(other.values.iter()) {
7702 if a.is_missing() || b.is_missing() {
7703 continue;
7704 }
7705 let av = a.to_f64().map_err(ColumnError::Type)?;
7706 let bv = b.to_f64().map_err(ColumnError::Type)?;
7707 if av.is_nan() || bv.is_nan() {
7708 continue;
7709 }
7710 sum += av * bv;
7711 }
7712 Ok(sum)
7713 }
7714
7715 pub fn convolve(&self, kernel: &Self, mode: &str) -> Result<Self, ColumnError> {
7722 let a: Vec<f64> = self
7723 .values
7724 .iter()
7725 .map(|v| v.to_f64().unwrap_or(0.0))
7726 .collect();
7727 let v: Vec<f64> = kernel
7728 .values
7729 .iter()
7730 .map(|v| v.to_f64().unwrap_or(0.0))
7731 .collect();
7732
7733 if a.is_empty() || v.is_empty() {
7734 return Self::new(DType::Float64, vec![]);
7735 }
7736
7737 let full_len = a.len() + v.len() - 1;
7738 let mut full: Vec<f64> = vec![0.0; full_len];
7739
7740 for (i, &ai) in a.iter().enumerate() {
7741 for (j, &vj) in v.iter().enumerate() {
7742 full[i + j] += ai * vj;
7743 }
7744 }
7745
7746 let out: Vec<f64> = match mode {
7747 "full" => full,
7748 "same" => {
7749 let target_len = a.len().max(v.len());
7750 let start = (full_len - target_len) / 2;
7751 full[start..start + target_len].to_vec()
7752 }
7753 "valid" => {
7754 let min_len = a.len().min(v.len());
7755 let valid_len = a.len().max(v.len()) - min_len + 1;
7756 let start = min_len - 1;
7757 full[start..start + valid_len].to_vec()
7758 }
7759 _ => {
7760 return Err(ColumnError::Type(TypeError::NonNumericValue {
7761 value: format!("invalid mode '{mode}', expected 'full', 'same', or 'valid'"),
7762 dtype: self.dtype,
7763 }));
7764 }
7765 };
7766
7767 let scalars: Vec<Scalar> = out.into_iter().map(Scalar::Float64).collect();
7768 Self::new(DType::Float64, scalars)
7769 }
7770
7771 pub fn correlate(&self, other: &Self, mode: &str) -> Result<Self, ColumnError> {
7775 let reversed = other.reverse()?;
7776 self.convolve(&reversed, mode)
7777 }
7778
7779 pub fn fillna_with_column(&self, other: &Self) -> Result<Self, ColumnError> {
7786 if self.values.len() != other.values.len() {
7787 return Err(ColumnError::LengthMismatch {
7788 left: self.values.len(),
7789 right: other.values.len(),
7790 });
7791 }
7792 let out: Vec<Scalar> = self
7793 .values
7794 .iter()
7795 .zip(other.values.iter())
7796 .map(|(v, o)| {
7797 if v.is_missing() {
7798 cast_scalar(o, self.dtype)
7799 } else {
7800 Ok(v.clone())
7801 }
7802 })
7803 .collect::<Result<Vec<_>, _>>()
7804 .map_err(ColumnError::Type)?;
7805 Self::new(self.dtype, out)
7806 }
7807
7808 pub fn divmod(&self, divisor: &Self) -> Result<(Self, Self), ColumnError> {
7816 if self.values.len() != divisor.values.len() {
7817 return Err(ColumnError::LengthMismatch {
7818 left: self.values.len(),
7819 right: divisor.values.len(),
7820 });
7821 }
7822 let mut quotient = Vec::with_capacity(self.values.len());
7823 let mut remainder = Vec::with_capacity(self.values.len());
7824 for (a, b) in self.values.iter().zip(divisor.values.iter()) {
7825 if a.is_missing() || b.is_missing() {
7826 quotient.push(Scalar::Null(NullKind::NaN));
7827 remainder.push(Scalar::Null(NullKind::NaN));
7828 continue;
7829 }
7830 let num = match a.to_f64() {
7831 Ok(x) if !x.is_nan() => x,
7832 _ => {
7833 quotient.push(Scalar::Null(NullKind::NaN));
7834 remainder.push(Scalar::Null(NullKind::NaN));
7835 continue;
7836 }
7837 };
7838 let den = match b.to_f64() {
7839 Ok(x) if !x.is_nan() => x,
7840 _ => {
7841 quotient.push(Scalar::Null(NullKind::NaN));
7842 remainder.push(Scalar::Null(NullKind::NaN));
7843 continue;
7844 }
7845 };
7846 if den == 0.0 {
7847 quotient.push(Scalar::Null(NullKind::NaN));
7848 remainder.push(Scalar::Null(NullKind::NaN));
7849 continue;
7850 }
7851 let q = python_floor_div_f64(num, den);
7854 let r = python_mod_f64(num, den);
7855 quotient.push(Scalar::Float64(q));
7856 remainder.push(Scalar::Float64(r));
7857 }
7858 Ok((
7859 Self::new(DType::Float64, quotient)?,
7860 Self::new(DType::Float64, remainder)?,
7861 ))
7862 }
7863
7864 pub fn where_cond_series(&self, cond: &Self, other: &Self) -> Result<Self, ColumnError> {
7873 if cond.dtype != DType::Bool {
7874 return Err(ColumnError::InvalidMaskType { dtype: cond.dtype });
7875 }
7876 if self.values.len() != cond.values.len() || self.values.len() != other.values.len() {
7877 return Err(ColumnError::LengthMismatch {
7878 left: self.values.len(),
7879 right: cond.values.len().max(other.values.len()),
7880 });
7881 }
7882 if let Some(cb) = cond.as_bool_slice() {
7889 if let (Some(s), Some(o)) = (self.as_f64_slice(), other.as_f64_slice()) {
7890 let out: Vec<f64> = (0..s.len())
7891 .map(|i| if cb[i] { s[i] } else { o[i] })
7892 .collect();
7893 return Ok(Self::from_f64_values(out));
7894 }
7895 if let (Some(s), Some(o)) = (self.as_i64_slice(), other.as_i64_slice()) {
7896 let out: Vec<i64> = (0..s.len())
7897 .map(|i| if cb[i] { s[i] } else { o[i] })
7898 .collect();
7899 return Ok(Self::from_i64_values(out));
7900 }
7901 }
7902 let out: Vec<Scalar> = self
7903 .values
7904 .iter()
7905 .zip(cond.values.iter().zip(other.values.iter()))
7906 .map(|(v, (c, o))| match c {
7907 Scalar::Bool(true) => Ok(v.clone()),
7908 Scalar::Bool(false) => cast_scalar(o, self.dtype),
7909 _ => Ok(Scalar::Null(NullKind::NaN)),
7910 })
7911 .collect::<Result<Vec<_>, _>>()
7912 .map_err(ColumnError::Type)?;
7913 Self::new(self.dtype, out)
7914 }
7915
7916 pub fn mask_series(&self, cond: &Self, other: &Self) -> Result<Self, ColumnError> {
7921 if cond.dtype != DType::Bool {
7922 return Err(ColumnError::InvalidMaskType { dtype: cond.dtype });
7923 }
7924 if self.values.len() != cond.values.len() || self.values.len() != other.values.len() {
7925 return Err(ColumnError::LengthMismatch {
7926 left: self.values.len(),
7927 right: cond.values.len().max(other.values.len()),
7928 });
7929 }
7930 if let Some(cb) = cond.as_bool_slice() {
7933 if let (Some(s), Some(o)) = (self.as_f64_slice(), other.as_f64_slice()) {
7934 let out: Vec<f64> = (0..s.len())
7935 .map(|i| if cb[i] { o[i] } else { s[i] })
7936 .collect();
7937 return Ok(Self::from_f64_values(out));
7938 }
7939 if let (Some(s), Some(o)) = (self.as_i64_slice(), other.as_i64_slice()) {
7940 let out: Vec<i64> = (0..s.len())
7941 .map(|i| if cb[i] { o[i] } else { s[i] })
7942 .collect();
7943 return Ok(Self::from_i64_values(out));
7944 }
7945 }
7946 let out: Vec<Scalar> = self
7947 .values
7948 .iter()
7949 .zip(cond.values.iter().zip(other.values.iter()))
7950 .map(|(v, (c, o))| match c {
7951 Scalar::Bool(true) => cast_scalar(o, self.dtype),
7952 Scalar::Bool(false) => Ok(v.clone()),
7953 _ => Ok(Scalar::Null(NullKind::NaN)),
7954 })
7955 .collect::<Result<Vec<_>, _>>()
7956 .map_err(ColumnError::Type)?;
7957 Self::new(self.dtype, out)
7958 }
7959
7960 pub fn replace_values(
7970 &self,
7971 to_replace: &[Scalar],
7972 replacement: &[Scalar],
7973 ) -> Result<Self, ColumnError> {
7974 if to_replace.len() != replacement.len() {
7975 return Err(ColumnError::LengthMismatch {
7976 left: to_replace.len(),
7977 right: replacement.len(),
7978 });
7979 }
7980 let out: Vec<Scalar> = self
7981 .values
7982 .iter()
7983 .map(|v| {
7984 for (target, replacement_val) in to_replace.iter().zip(replacement.iter()) {
7985 let matches = if target.is_missing() && v.is_missing() {
7987 true
7988 } else if target.is_missing() || v.is_missing() {
7989 false
7990 } else {
7991 v.semantic_eq(target)
7992 };
7993 if matches {
7994 return replacement_val.clone();
7995 }
7996 }
7997 v.clone()
7998 })
7999 .collect();
8000 let inferred = infer_dtype(&out).unwrap_or(self.dtype);
8001 Self::new(inferred, out)
8002 }
8003
8004 pub fn replace(
8008 &self,
8009 to_replace: &[Scalar],
8010 replacement: &[Scalar],
8011 ) -> Result<Self, ColumnError> {
8012 self.replace_values(to_replace, replacement)
8013 }
8014
8015 #[must_use]
8022 pub fn nonzero(&self) -> Vec<usize> {
8023 let mut out = Vec::new();
8024 for (i, v) in self.values.iter().enumerate() {
8025 if v.is_missing() {
8026 continue;
8027 }
8028 let truthy = match v {
8029 Scalar::Bool(b) => *b,
8030 Scalar::Int64(x) => *x != 0,
8031 Scalar::Float64(x) => *x != 0.0 && !x.is_nan(),
8032 Scalar::Utf8(s) => !s.is_empty(),
8033 Scalar::Timedelta64(x) => *x != 0,
8034 Scalar::Datetime64(x) => *x != Timestamp::NAT,
8035 Scalar::Period(x) => *x != i64::MIN,
8036 Scalar::Interval(_) => true,
8037 Scalar::Null(_) => false,
8038 };
8039 if truthy {
8040 out.push(i);
8041 }
8042 }
8043 out
8044 }
8045
8046 #[must_use]
8050 pub fn count_nonzero(&self) -> usize {
8051 self.nonzero().len()
8052 }
8053
8054 pub fn flatnonzero(&self) -> Result<Self, ColumnError> {
8058 let indices: Vec<Scalar> = self
8059 .nonzero()
8060 .into_iter()
8061 .map(|i| Scalar::Int64(i as i64))
8062 .collect();
8063 Self::new(DType::Int64, indices)
8064 }
8065
8066 pub fn where_cond(&self, cond: &Self, other: &Scalar) -> Result<Self, ColumnError> {
8073 if cond.dtype != DType::Bool {
8074 return Err(ColumnError::InvalidMaskType { dtype: cond.dtype });
8075 }
8076 if self.values.len() != cond.values.len() {
8077 return Err(ColumnError::LengthMismatch {
8078 left: self.values.len(),
8079 right: cond.values.len(),
8080 });
8081 }
8082 if !other.is_missing()
8088 && let Some(cb) = cond.as_bool_slice()
8089 {
8090 if let Some(s) = self.as_f64_slice()
8091 && let Ok(o) = other.to_f64()
8092 {
8093 let out: Vec<f64> = (0..s.len()).map(|i| if cb[i] { s[i] } else { o }).collect();
8094 return Ok(Self::from_f64_values(out));
8095 }
8096 if let Some(s) = self.as_i64_slice()
8097 && let Scalar::Int64(o) = other
8098 {
8099 let o = *o;
8100 let out: Vec<i64> = (0..s.len()).map(|i| if cb[i] { s[i] } else { o }).collect();
8101 return Ok(Self::from_i64_values(out));
8102 }
8103 }
8104 let out: Vec<Scalar> = self
8105 .values
8106 .iter()
8107 .zip(cond.values.iter())
8108 .map(|(v, c)| match c {
8109 Scalar::Bool(true) => v.clone(),
8110 Scalar::Bool(false) => other.clone(),
8111 _ => Scalar::Null(NullKind::NaN),
8112 })
8113 .collect();
8114 Self::new(self.dtype, out)
8115 }
8116
8117 pub fn r#where(&self, cond: &Self, other: &Scalar) -> Result<Self, ColumnError> {
8120 self.where_cond(cond, other)
8121 }
8122
8123 pub fn rank(&self, method: &str, ascending: bool) -> Result<Self, ColumnError> {
8136 let valid_method = matches!(method, "average" | "min" | "max" | "first" | "dense");
8137 if !valid_method {
8138 return Err(ColumnError::Type(TypeError::NonNumericValue {
8139 value: method.to_string(),
8140 dtype: self.dtype,
8141 }));
8142 }
8143
8144 let len = self.values.len();
8145
8146 if let Some(data) = self.as_i64_slice()
8155 && let Some((min, range)) = i64_direct_address_range(data)
8156 {
8157 let total = data.len() as i64;
8158 let mut count = vec![0i64; range];
8159 for &v in data {
8160 count[(v as i128 - min as i128) as usize] += 1;
8161 }
8162 let mut c_less = vec![0i64; range];
8165 let mut dense_asc = vec![0i64; range];
8166 let mut acc = 0i64;
8167 let mut ord = 0i64;
8168 for s in 0..range {
8169 c_less[s] = acc;
8170 if count[s] > 0 {
8171 ord += 1;
8172 dense_asc[s] = ord;
8173 }
8174 acc += count[s];
8175 }
8176 let n_distinct = ord;
8177 let mut occ = vec![0i64; range];
8178 let mut ranks = vec![Scalar::Null(NullKind::NaN); len];
8179 for (i, &v) in data.iter().enumerate() {
8180 let s = (v as i128 - min as i128) as usize;
8181 let c = count[s];
8182 let before = if ascending {
8186 c_less[s]
8187 } else {
8188 total - c_less[s] - c
8189 };
8190 let start_rank = before as f64 + 1.0;
8191 let end_rank = (before + c) as f64;
8192 let value = match method {
8193 "average" => (start_rank + end_rank) / 2.0,
8194 "min" => start_rank,
8195 "max" => end_rank,
8196 "first" => {
8197 let k = occ[s];
8198 occ[s] += 1;
8199 (before + k) as f64 + 1.0
8200 }
8201 "dense" => {
8202 let d = if ascending {
8203 dense_asc[s]
8204 } else {
8205 n_distinct - dense_asc[s] + 1
8206 };
8207 d as f64
8208 }
8209 _ => unreachable!(),
8210 };
8211 ranks[i] = Scalar::Float64(value);
8212 }
8213 return Self::new(DType::Float64, ranks);
8214 }
8215
8216 if let Some(data) = self.as_f64_slice()
8231 && !data.iter().any(|x| x.is_nan())
8232 {
8233 let perm = self
8234 .typed_radix_perm(ascending)
8235 .expect("f64 slice yields radix perm");
8236 let n = perm.len();
8237 let mut ranks = vec![0.0_f64; len];
8238 let mut cursor = 0usize;
8239 let mut dense_rank = 0f64;
8240 while cursor < n {
8241 let mut end = cursor + 1;
8242 while end < n && data[perm[end]] == data[perm[cursor]] {
8243 end += 1;
8244 }
8245 let start_rank = cursor as f64 + 1.0;
8246 let end_rank = end as f64;
8247 dense_rank += 1.0;
8248 #[allow(clippy::needless_range_loop)] for group_idx in cursor..end {
8250 let original = perm[group_idx];
8251 ranks[original] = match method {
8252 "average" => (start_rank + end_rank) / 2.0,
8253 "min" => start_rank,
8254 "max" => end_rank,
8255 "first" => group_idx as f64 + 1.0,
8256 "dense" => dense_rank,
8257 _ => unreachable!(),
8258 };
8259 }
8260 cursor = end;
8261 }
8262 return Ok(Self::from_f64_values(ranks));
8263 }
8264
8265 let mut non_missing: Vec<(usize, &Scalar)> = Vec::with_capacity(len);
8266 for (i, v) in self.values.iter().enumerate() {
8267 if !v.is_missing() {
8268 non_missing.push((i, v));
8269 }
8270 }
8271 non_missing.sort_by(|a, b| compare_scalars_na_last(a.1, b.1, ascending));
8272
8273 let mut ranks = vec![Scalar::Null(NullKind::NaN); len];
8274 let n = non_missing.len();
8275 let mut cursor = 0usize;
8276 let mut dense_rank = 0f64;
8277 while cursor < n {
8278 let mut end = cursor + 1;
8279 while end < n {
8280 let same =
8281 compare_scalars_na_last(non_missing[cursor].1, non_missing[end].1, ascending)
8282 .is_eq();
8283 if !same {
8284 break;
8285 }
8286 end += 1;
8287 }
8288 let start_rank = cursor as f64 + 1.0;
8289 let end_rank = end as f64;
8290 dense_rank += 1.0;
8291 for (group_idx, entry) in non_missing.iter().enumerate().take(end).skip(cursor) {
8292 let original = entry.0;
8293 let value = match method {
8294 "average" => (start_rank + end_rank) / 2.0,
8295 "min" => start_rank,
8296 "max" => end_rank,
8297 "first" => group_idx as f64 + 1.0,
8298 "dense" => dense_rank,
8299 _ => unreachable!(),
8300 };
8301 ranks[original] = Scalar::Float64(value);
8302 }
8303 cursor = end;
8304 }
8305 Self::new(DType::Float64, ranks)
8306 }
8307
8308 pub fn searchsorted(&self, needle: &Scalar, side: &str) -> Result<usize, ColumnError> {
8316 self.searchsorted_position(needle, side, None)
8317 }
8318
8319 pub fn searchsorted_with_sorter(
8324 &self,
8325 needle: &Scalar,
8326 side: &str,
8327 sorter: &[usize],
8328 ) -> Result<usize, ColumnError> {
8329 self.searchsorted_position(needle, side, Some(sorter))
8330 }
8331
8332 pub fn searchsorted_values(&self, needles: &[Scalar], side: &str) -> Result<Self, ColumnError> {
8338 let positions: Vec<Scalar> = needles
8339 .iter()
8340 .map(|needle| self.searchsorted_position(needle, side, None))
8341 .map(|result| result.map(|position| Scalar::Int64(position as i64)))
8342 .collect::<Result<Vec<_>, _>>()?;
8343 Self::new(DType::Int64, positions)
8344 }
8345
8346 pub fn searchsorted_values_with_sorter(
8351 &self,
8352 needles: &[Scalar],
8353 side: &str,
8354 sorter: &[usize],
8355 ) -> Result<Self, ColumnError> {
8356 let positions: Vec<Scalar> = needles
8357 .iter()
8358 .map(|needle| self.searchsorted_position(needle, side, Some(sorter)))
8359 .map(|result| result.map(|position| Scalar::Int64(position as i64)))
8360 .collect::<Result<Vec<_>, _>>()?;
8361 Self::new(DType::Int64, positions)
8362 }
8363
8364 fn searchsorted_position(
8365 &self,
8366 needle: &Scalar,
8367 side: &str,
8368 sorter: Option<&[usize]>,
8369 ) -> Result<usize, ColumnError> {
8370 if side != "left" && side != "right" {
8371 return Err(ColumnError::Type(TypeError::NonNumericValue {
8372 value: side.to_string(),
8373 dtype: self.dtype,
8374 }));
8375 }
8376 if needle.is_missing() {
8377 return Err(ColumnError::Type(TypeError::ValueIsMissing {
8378 kind: NullKind::NaN,
8379 }));
8380 }
8381
8382 let sorter = self.validate_searchsorted_sorter(sorter)?;
8383 let len = sorter.map_or(self.values.len(), <[usize]>::len);
8384 let mut lo = 0usize;
8385 let mut hi = len;
8386 while lo < hi {
8387 let mid = lo + (hi - lo) / 2;
8388 let mid_idx = sorter.map_or(mid, |indices| indices[mid]);
8389 let mid_val = &self.values[mid_idx];
8390 let ord = if mid_val.is_missing() {
8393 std::cmp::Ordering::Greater
8394 } else {
8395 compare_scalars_na_last(mid_val, needle, true)
8396 };
8397 use std::cmp::Ordering;
8398 let go_right = match (ord, side) {
8399 (Ordering::Less, _) => true,
8400 (Ordering::Equal, "left") => false,
8401 (Ordering::Equal, "right") => true,
8402 (Ordering::Greater, _) => false,
8403 _ => unreachable!(),
8404 };
8405 if go_right {
8406 lo = mid + 1;
8407 } else {
8408 hi = mid;
8409 }
8410 }
8411 Ok(lo)
8412 }
8413
8414 fn validate_searchsorted_sorter<'a>(
8415 &self,
8416 sorter: Option<&'a [usize]>,
8417 ) -> Result<Option<&'a [usize]>, ColumnError> {
8418 let Some(sorter) = sorter else {
8419 return Ok(None);
8420 };
8421 let len = self.values.len();
8422 if sorter.len() != len {
8423 return Err(ColumnError::LengthMismatch {
8424 left: len,
8425 right: sorter.len(),
8426 });
8427 }
8428 let mut seen = vec![false; len];
8429 for &idx in sorter {
8430 if idx >= len {
8431 return Err(ColumnError::InvalidSorter {
8432 len,
8433 reason: format!("index {idx} out of bounds"),
8434 });
8435 }
8436 if std::mem::replace(&mut seen[idx], true) {
8437 return Err(ColumnError::InvalidSorter {
8438 len,
8439 reason: format!("index {idx} appears more than once"),
8440 });
8441 }
8442 }
8443 Ok(Some(sorter))
8444 }
8445
8446 pub fn digitize(&self, bins: &Self, right: bool) -> Result<Self, ColumnError> {
8450 let mut out = Vec::with_capacity(self.values.len());
8451 for v in &self.values {
8452 if v.is_missing() {
8453 out.push(Scalar::Int64(0));
8454 continue;
8455 }
8456 let vf = v.to_f64().map_err(ColumnError::Type)?;
8457 let side = if right { "right" } else { "left" };
8458 let pos = bins.searchsorted(&Scalar::Float64(vf), side)?;
8459 out.push(Scalar::Int64(pos as i64));
8460 }
8461 Self::new(DType::Int64, out)
8462 }
8463
8464 pub fn bincount(&self, minlength: usize) -> Result<Self, ColumnError> {
8469 let mut max_val = 0i64;
8470 for v in &self.values {
8471 if v.is_missing() {
8472 continue;
8473 }
8474 match v {
8475 Scalar::Int64(x) if *x >= 0 => {
8476 if *x > max_val {
8477 max_val = *x;
8478 }
8479 }
8480 Scalar::Int64(x) => {
8481 return Err(ColumnError::Type(TypeError::NonNumericValue {
8482 value: format!("negative value {x}"),
8483 dtype: self.dtype,
8484 }));
8485 }
8486 _ => {
8487 return Err(ColumnError::Type(TypeError::NonNumericValue {
8488 value: format!("{v:?}"),
8489 dtype: self.dtype,
8490 }));
8491 }
8492 }
8493 }
8494 let len = (max_val as usize + 1).max(minlength);
8495 let mut counts = vec![0i64; len];
8496 for v in &self.values {
8497 if v.is_missing() {
8498 continue;
8499 }
8500 if let Scalar::Int64(x) = v {
8501 counts[*x as usize] += 1;
8502 }
8503 }
8504 let out: Vec<Scalar> = counts.into_iter().map(Scalar::Int64).collect();
8505 Self::new(DType::Int64, out)
8506 }
8507
8508 pub fn histogram(&self, bin_edges: &[f64]) -> Result<Self, ColumnError> {
8513 if bin_edges.len() < 2 {
8514 return Err(ColumnError::Type(TypeError::NonNumericValue {
8515 value: "histogram requires at least 2 bin edges".to_owned(),
8516 dtype: self.dtype,
8517 }));
8518 }
8519 let n_bins = bin_edges.len() - 1;
8520 let mut counts = vec![0i64; n_bins];
8521
8522 let strict = bin_edges.windows(2).all(|w| w[0] < w[1]);
8532
8533 for v in &self.values {
8534 if v.is_missing() {
8535 continue;
8536 }
8537 let x = match v.to_f64() {
8538 Ok(f) if f.is_finite() => f,
8539 _ => continue,
8540 };
8541 if strict {
8542 if x < bin_edges[0] || x > bin_edges[n_bins] {
8543 continue;
8544 }
8545 let bin = (bin_edges.partition_point(|&e| e <= x) - 1).min(n_bins - 1);
8546 counts[bin] += 1;
8547 continue;
8548 }
8549 for i in 0..n_bins {
8551 let in_bin = if i == n_bins - 1 {
8552 x >= bin_edges[i] && x <= bin_edges[i + 1]
8554 } else {
8555 x >= bin_edges[i] && x < bin_edges[i + 1]
8556 };
8557 if in_bin {
8558 counts[i] += 1;
8559 break;
8560 }
8561 }
8562 }
8564
8565 let out: Vec<Scalar> = counts.into_iter().map(Scalar::Int64).collect();
8566 Self::new(DType::Int64, out)
8567 }
8568
8569 pub fn histogram_auto(&self, n_bins: usize) -> Result<(Self, Vec<f64>), ColumnError> {
8574 if n_bins == 0 {
8575 return Err(ColumnError::Type(TypeError::NonNumericValue {
8576 value: "histogram requires at least 1 bin".to_owned(),
8577 dtype: self.dtype,
8578 }));
8579 }
8580
8581 let mut min_val = f64::INFINITY;
8583 let mut max_val = f64::NEG_INFINITY;
8584 for v in &self.values {
8585 if v.is_missing() {
8586 continue;
8587 }
8588 if let Ok(x) = v.to_f64()
8589 && x.is_finite()
8590 {
8591 min_val = min_val.min(x);
8592 max_val = max_val.max(x);
8593 }
8594 }
8595
8596 if !min_val.is_finite() || !max_val.is_finite() || min_val > max_val {
8597 let counts: Vec<Scalar> = vec![Scalar::Int64(0); n_bins];
8599 let edges = vec![0.0; n_bins + 1];
8600 return Ok((Self::new(DType::Int64, counts)?, edges));
8601 }
8602
8603 let range = max_val - min_val;
8605 let (adj_min, adj_max) = if range == 0.0 {
8606 (min_val - 0.5, max_val + 0.5)
8608 } else {
8609 (min_val, max_val)
8610 };
8611 let adj_range = adj_max - adj_min;
8612 let step = adj_range / n_bins as f64;
8613 let bin_edges: Vec<f64> = (0..=n_bins).map(|i| adj_min + step * i as f64).collect();
8614
8615 let counts = self.histogram(&bin_edges)?;
8616 Ok((counts, bin_edges))
8617 }
8618
8619 pub fn astype(&self, target: DType) -> Result<Self, ColumnError> {
8629 if self.dtype == target {
8630 return Ok(self.clone());
8631 }
8632 if target == DType::Float64
8640 && let Some(data) = self.as_i64_slice()
8641 {
8642 let out: Vec<f64> = data.iter().map(|&x| x as f64).collect();
8643 return Ok(Self::from_f64_values(out));
8644 }
8645 if target == DType::Int64
8646 && let Some(data) = self.as_f64_slice()
8647 && data
8648 .iter()
8649 .all(|&v| v >= i64::MIN as f64 && v < 9_223_372_036_854_775_808.0)
8650 {
8651 let out: Vec<i64> = data.iter().map(|&v| v as i64).collect();
8652 return Ok(Self::from_i64_values(out));
8653 }
8654 let out: Vec<Scalar> = self
8655 .values
8656 .iter()
8657 .map(|v| cast_scalar(v, target))
8658 .collect::<Result<Vec<_>, _>>()
8659 .map_err(ColumnError::Type)?;
8660 Self::new(target, out)
8661 }
8662
8663 pub fn nsmallest_keep(&self, n: usize, keep: &str) -> Result<Self, ColumnError> {
8673 nkeep_impl(self, n, keep, true)
8674 }
8675
8676 pub fn nlargest_keep(&self, n: usize, keep: &str) -> Result<Self, ColumnError> {
8682 nkeep_impl(self, n, keep, false)
8683 }
8684
8685 pub fn nlargest(&self, n: usize) -> Result<Self, ColumnError> {
8693 let sorted = self.sort_values(false)?;
8694 let take = n.min(sorted.values.len());
8695 let values: Vec<Scalar> = sorted.values[..take].to_vec();
8696 Self::new(self.dtype, values)
8697 }
8698
8699 pub fn nsmallest(&self, n: usize) -> Result<Self, ColumnError> {
8703 let sorted = self.sort_values(true)?;
8704 let take = n.min(sorted.values.len());
8705 let values: Vec<Scalar> = sorted.values[..take].to_vec();
8706 Self::new(self.dtype, values)
8707 }
8708
8709 pub fn mask(&self, cond: &Self, other: &Scalar) -> Result<Self, ColumnError> {
8714 if cond.dtype != DType::Bool {
8715 return Err(ColumnError::InvalidMaskType { dtype: cond.dtype });
8716 }
8717 if self.values.len() != cond.values.len() {
8718 return Err(ColumnError::LengthMismatch {
8719 left: self.values.len(),
8720 right: cond.values.len(),
8721 });
8722 }
8723 if !other.is_missing()
8726 && let Some(cb) = cond.as_bool_slice()
8727 {
8728 if let Some(s) = self.as_f64_slice()
8729 && let Ok(o) = other.to_f64()
8730 {
8731 let out: Vec<f64> = (0..s.len()).map(|i| if cb[i] { o } else { s[i] }).collect();
8732 return Ok(Self::from_f64_values(out));
8733 }
8734 if let Some(s) = self.as_i64_slice()
8735 && let Scalar::Int64(o) = other
8736 {
8737 let o = *o;
8738 let out: Vec<i64> = (0..s.len()).map(|i| if cb[i] { o } else { s[i] }).collect();
8739 return Ok(Self::from_i64_values(out));
8740 }
8741 }
8742 let out: Vec<Scalar> = self
8743 .values
8744 .iter()
8745 .zip(cond.values.iter())
8746 .map(|(v, c)| match c {
8747 Scalar::Bool(true) => other.clone(),
8748 Scalar::Bool(false) => v.clone(),
8749 _ => Scalar::Null(NullKind::NaN),
8750 })
8751 .collect();
8752 Self::new(self.dtype, out)
8753 }
8754
8755 fn as_all_valid_str_vec(&self) -> Option<Vec<&str>> {
8770 if self.dtype != DType::Utf8 || !self.validity.all() {
8771 return None;
8772 }
8773 if let Some((bytes, offsets)) = self.as_utf8_contiguous() {
8780 let mut strs = Vec::with_capacity(offsets.len() - 1);
8781 for w in offsets.windows(2) {
8782 strs.push(
8783 std::str::from_utf8(&bytes[w[0]..w[1]])
8784 .expect("contiguous utf8 buffer is valid by construction"),
8785 );
8786 }
8787 return Some(strs);
8788 }
8789 let mut strs = Vec::with_capacity(self.len());
8790 for v in self.values.iter() {
8791 match v {
8792 Scalar::Utf8(s) => strs.push(s.as_str()),
8793 _ => return None,
8794 }
8795 }
8796 Some(strs)
8797 }
8798
8799 fn typed_radix_perm(&self, ascending: bool) -> Option<Vec<usize>> {
8800 if let Some(data) = self.as_i64_slice() {
8801 let keys: Vec<u64> = if ascending {
8802 data.iter().map(|&v| i64_radix_key(v)).collect()
8803 } else {
8804 data.iter().map(|&v| !i64_radix_key(v)).collect()
8805 };
8806 return Some(radix_argsort_u64(&keys));
8807 }
8808 if let Some(data) = self.as_f64_slice() {
8809 let keys: Vec<u64> = if ascending {
8810 data.iter().map(|&v| f64_radix_key(v)).collect()
8811 } else {
8812 data.iter().map(|&v| !f64_radix_key(v)).collect()
8813 };
8814 return Some(radix_argsort_u64(&keys));
8815 }
8816 None
8817 }
8818
8819 #[must_use]
8829 pub fn typed_radix_keys(&self, ascending: bool) -> Option<Vec<u64>> {
8830 if let Some(data) = self.as_i64_slice() {
8831 return Some(if ascending {
8832 data.iter().map(|&v| i64_radix_key(v)).collect()
8833 } else {
8834 data.iter().map(|&v| !i64_radix_key(v)).collect()
8835 });
8836 }
8837 if let Some(data) = self.as_f64_slice() {
8838 if data.iter().any(|x| x.is_nan()) {
8839 return None;
8840 }
8841 return Some(if ascending {
8842 data.iter().map(|&v| f64_radix_key(v)).collect()
8843 } else {
8844 data.iter().map(|&v| !f64_radix_key(v)).collect()
8845 });
8846 }
8847 None
8848 }
8849
8850 pub fn sort_values(&self, ascending: bool) -> Result<Self, ColumnError> {
8851 if let Some(data) = self.as_i64_slice() {
8855 let perm = self
8856 .typed_radix_perm(ascending)
8857 .expect("i64 slice yields perm");
8858 let sorted: Vec<i64> = perm.iter().map(|&i| data[i]).collect();
8859 return Ok(Self::from_i64_values(sorted));
8860 }
8861 if let Some(data) = self.as_f64_slice() {
8862 let perm = self
8863 .typed_radix_perm(ascending)
8864 .expect("f64 slice yields perm");
8865 let sorted: Vec<f64> = perm.iter().map(|&i| data[i]).collect();
8866 return Ok(Self::from_f64_values(sorted));
8867 }
8868 if let Some(strs) = self.as_all_valid_str_vec() {
8873 let perm = utf8_msd_argsort(&strs, ascending);
8874 let sorted: Vec<Scalar> = perm.iter().map(|&i| self.values[i].clone()).collect();
8875 return Self::new(self.dtype, sorted);
8876 }
8877 let mut indexed: Vec<(usize, &Scalar)> = self.values.iter().enumerate().collect();
8878 indexed.sort_by(|a, b| compare_scalars_na_last(a.1, b.1, ascending));
8879 let sorted: Vec<Scalar> = indexed.into_iter().map(|(_, v)| v.clone()).collect();
8880 Self::new(self.dtype, sorted)
8881 }
8882
8883 #[must_use]
8889 pub fn argsort(&self) -> Vec<usize> {
8890 self.argsort_with(true)
8891 }
8892
8893 #[must_use]
8898 pub fn argsort_with(&self, ascending: bool) -> Vec<usize> {
8899 if let Some(perm) = self.typed_radix_perm(ascending) {
8900 return perm;
8901 }
8902 if let Some(strs) = self.as_all_valid_str_vec() {
8907 return utf8_msd_argsort(&strs, ascending);
8908 }
8909 let mut indexed: Vec<(usize, &Scalar)> = self.values.iter().enumerate().collect();
8910 indexed.sort_by(|a, b| compare_scalars_na_last(a.1, b.1, ascending));
8911 indexed.into_iter().map(|(i, _)| i).collect()
8912 }
8913
8914 pub fn argpartition(&self, kth: usize) -> Result<Vec<usize>, ColumnError> {
8920 if kth >= self.len() {
8921 return Err(ColumnError::InvalidLength {
8922 operation: "argpartition",
8923 expected: kth + 1,
8924 actual: self.len(),
8925 });
8926 }
8927 let mut indexed: Vec<(usize, &Scalar)> = self.values.iter().enumerate().collect();
8928 indexed.select_nth_unstable_by(kth, |a, b| compare_scalars_na_last(a.1, b.1, true));
8929 Ok(indexed.into_iter().map(|(i, _)| i).collect())
8930 }
8931
8932 pub fn partition(&self, kth: usize) -> Result<Self, ColumnError> {
8937 let indices = self.argpartition(kth)?;
8938 let out: Vec<Scalar> = indices.iter().map(|&i| self.values[i].clone()).collect();
8939 Self::new(self.dtype, out)
8940 }
8941
8942 pub fn diff(&self, periods: i64) -> Result<Self, ColumnError> {
8949 let len = self.values.len();
8950 let out_dtype = match self.dtype {
8958 DType::Timedelta64 => DType::Timedelta64,
8959 DType::Bool => DType::Bool,
8960 _ => DType::Float64,
8961 };
8962 if len == 0 || periods == 0 {
8963 let null = if out_dtype == DType::Timedelta64 {
8964 Scalar::Null(NullKind::NaT)
8965 } else {
8966 Scalar::Null(NullKind::NaN)
8967 };
8968 return Self::new(out_dtype, vec![null; len]);
8969 }
8970 let abs = periods.unsigned_abs() as usize;
8971 let mut out: Vec<Scalar> = Vec::with_capacity(len);
8972 let null_scalar = if out_dtype == DType::Timedelta64 {
8973 Scalar::Null(NullKind::NaT)
8974 } else {
8975 Scalar::Null(NullKind::NaN)
8976 };
8977 for i in 0..len {
8978 if (periods > 0 && i < abs) || (periods < 0 && i + abs >= len) {
8979 out.push(null_scalar.clone());
8980 continue;
8981 }
8982 let (cur, prev) = if periods > 0 {
8983 (&self.values[i], &self.values[i - abs])
8984 } else {
8985 (&self.values[i], &self.values[i + abs])
8986 };
8987 if cur.is_missing() || prev.is_missing() {
8988 out.push(null_scalar.clone());
8989 continue;
8990 }
8991 if let (Scalar::Timedelta64(cur_ns), Scalar::Timedelta64(prev_ns)) = (cur, prev) {
8992 if *cur_ns == Timedelta::NAT || *prev_ns == Timedelta::NAT {
8993 out.push(Scalar::Null(NullKind::NaT));
8994 } else {
8995 out.push(Scalar::Timedelta64(cur_ns.saturating_sub(*prev_ns)));
8996 }
8997 continue;
8998 }
8999 if let (Scalar::Bool(cur_b), Scalar::Bool(prev_b)) = (cur, prev) {
9000 out.push(Scalar::Bool(cur_b != prev_b));
9002 continue;
9003 }
9004 match (cur.to_f64(), prev.to_f64()) {
9005 (Ok(a), Ok(b)) => out.push(Scalar::Float64(a - b)),
9006 _ => out.push(Scalar::Null(NullKind::NaN)),
9007 }
9008 }
9009 Self::new(out_dtype, out)
9010 }
9011
9012 pub fn ediff1d(
9016 &self,
9017 to_begin: Option<Scalar>,
9018 to_end: Option<Scalar>,
9019 ) -> Result<Self, ColumnError> {
9020 let mut out = Vec::new();
9021 if let Some(v) = to_begin {
9022 out.push(v);
9023 }
9024 for i in 1..self.values.len() {
9025 let cur = &self.values[i];
9026 let prev = &self.values[i - 1];
9027 if cur.is_missing() || prev.is_missing() {
9028 out.push(Scalar::Float64(f64::NAN));
9029 continue;
9030 }
9031 let cf = cur.to_f64().map_err(ColumnError::Type)?;
9032 let pf = prev.to_f64().map_err(ColumnError::Type)?;
9033 out.push(Scalar::Float64(cf - pf));
9034 }
9035 if let Some(v) = to_end {
9036 out.push(v);
9037 }
9038 Self::new(DType::Float64, out)
9039 }
9040
9041 pub fn gradient(&self) -> Result<Self, ColumnError> {
9045 let n = self.values.len();
9046 if n == 0 {
9047 return Self::new(DType::Float64, Vec::new());
9048 }
9049 if n == 1 {
9050 return Self::new(DType::Float64, vec![Scalar::Float64(0.0)]);
9051 }
9052 let vals: Vec<f64> = self
9053 .values
9054 .iter()
9055 .map(|v| v.to_f64().unwrap_or(f64::NAN))
9056 .collect();
9057 let mut out = Vec::with_capacity(n);
9058 out.push(Scalar::Float64(vals[1] - vals[0]));
9059 for i in 1..n - 1 {
9060 out.push(Scalar::Float64((vals[i + 1] - vals[i - 1]) / 2.0));
9061 }
9062 out.push(Scalar::Float64(vals[n - 1] - vals[n - 2]));
9063 Self::new(DType::Float64, out)
9064 }
9065
9066 pub fn trapz(&self, dx: f64) -> Result<Scalar, ColumnError> {
9070 let n = self.values.len();
9071 if n < 2 {
9072 return Ok(Scalar::Float64(0.0));
9073 }
9074 let vals: Vec<f64> = self
9075 .values
9076 .iter()
9077 .map(|v| v.to_f64().unwrap_or(0.0))
9078 .collect();
9079 let mut sum = 0.0;
9080 for i in 1..n {
9081 sum += (vals[i - 1] + vals[i]) / 2.0 * dx;
9082 }
9083 Ok(Scalar::Float64(sum))
9084 }
9085
9086 pub fn duplicated(&self) -> Result<Self, ColumnError> {
9092 self.duplicated_keep("first")
9093 }
9094
9095 pub fn duplicated_keep(&self, keep: &str) -> Result<Self, ColumnError> {
9101 #[derive(Hash, PartialEq, Eq)]
9102 enum Key<'a> {
9103 Null,
9104 Bool(bool),
9105 Int64(i64),
9106 FloatBits(u64),
9107 Utf8(&'a str),
9108 Timedelta64(i64),
9109 Datetime64(i64),
9110 Period(i64),
9111 Interval(u64, u64, IntervalClosed),
9112 }
9113 fn key_of(v: &Scalar) -> Key<'_> {
9114 if v.is_missing() {
9115 return Key::Null;
9116 }
9117 match v {
9118 Scalar::Bool(b) => Key::Bool(*b),
9119 Scalar::Int64(i) => Key::Int64(*i),
9120 Scalar::Float64(f) => {
9121 let norm = if *f == 0.0 { 0.0 } else { *f };
9122 Key::FloatBits(norm.to_bits())
9123 }
9124 Scalar::Utf8(s) => Key::Utf8(s.as_str()),
9125 Scalar::Timedelta64(v) => Key::Timedelta64(*v),
9126 Scalar::Datetime64(v) => Key::Datetime64(*v),
9127 Scalar::Period(v) => Key::Period(*v),
9128 Scalar::Interval(v) => {
9129 let (left, right, closed) = interval_key(v);
9130 Key::Interval(left, right, closed)
9131 }
9132 Scalar::Null(_) => Key::Null,
9133 }
9134 }
9135
9136 let policy = match keep {
9137 "first" => DupPolicy::First,
9138 "last" => DupPolicy::Last,
9139 "false" | "False" | "none" => DupPolicy::None,
9140 other => {
9141 return Err(ColumnError::Type(TypeError::NonNumericValue {
9142 value: other.to_string(),
9143 dtype: self.dtype,
9144 }));
9145 }
9146 };
9147
9148 if let Some(data) = self.as_i64_slice() {
9154 if let Some((min, range)) = i64_direct_address_range(data) {
9157 return Ok(Self::from_bool_values(duplicated_flags_i64_direct(
9158 data, min, range, policy,
9159 )));
9160 }
9161 return Ok(Self::from_bool_values(duplicated_flags_typed(data, policy)));
9162 }
9163 if let Some(data) = self.as_f64_slice() {
9164 let keys: Vec<u64> = data
9165 .iter()
9166 .map(|&f| (if f == 0.0 { 0.0 } else { f }).to_bits())
9167 .collect();
9168 return Ok(Self::from_bool_values(duplicated_flags_typed(
9169 &keys, policy,
9170 )));
9171 }
9172
9173 let mut flags = vec![false; self.values.len()];
9174 match policy {
9175 DupPolicy::First => {
9176 let mut seen: FxHashSet<Key<'_>> = FxHashSet::default();
9177 for (idx, value) in self.values.iter().enumerate() {
9178 flags[idx] = !seen.insert(key_of(value));
9179 }
9180 }
9181 DupPolicy::Last => {
9182 let mut seen: FxHashSet<Key<'_>> = FxHashSet::default();
9183 for (idx, value) in self.values.iter().enumerate().rev() {
9184 flags[idx] = !seen.insert(key_of(value));
9185 }
9186 }
9187 DupPolicy::None => {
9188 let mut seen_once: FxHashSet<Key<'_>> = FxHashSet::default();
9189 let mut seen_multiple: FxHashSet<Key<'_>> = FxHashSet::default();
9190 for value in &self.values {
9191 let key = key_of(value);
9192 if !seen_once.insert(key_of(value)) {
9193 seen_multiple.insert(key);
9194 }
9195 }
9196 for (idx, value) in self.values.iter().enumerate() {
9197 flags[idx] = seen_multiple.contains(&key_of(value));
9198 }
9199 }
9200 }
9201
9202 let out: Vec<Scalar> = flags.into_iter().map(Scalar::Bool).collect();
9203 Self::new(DType::Bool, out)
9204 }
9205
9206 pub fn between(&self, lower: f64, upper: f64, inclusive: bool) -> Result<Self, ColumnError> {
9213 let policy = if inclusive { "both" } else { "neither" };
9214 self.between_inclusive(lower, upper, policy)
9215 }
9216
9217 pub fn between_inclusive(
9223 &self,
9224 lower: f64,
9225 upper: f64,
9226 inclusive: &str,
9227 ) -> Result<Self, ColumnError> {
9228 let (include_left, include_right) = match inclusive {
9229 "both" => (true, true),
9230 "left" => (true, false),
9231 "right" => (false, true),
9232 "neither" => (false, false),
9233 other => {
9234 return Err(ColumnError::Type(TypeError::NonNumericValue {
9235 value: other.to_string(),
9236 dtype: self.dtype,
9237 }));
9238 }
9239 };
9240
9241 let mut out = Vec::with_capacity(self.values.len());
9242 for v in &self.values {
9243 if v.is_missing() {
9244 out.push(Scalar::Bool(false));
9245 continue;
9246 }
9247 match v.to_f64() {
9248 Ok(x) => {
9249 let lower_ok = if include_left { x >= lower } else { x > lower };
9250 let upper_ok = if include_right { x <= upper } else { x < upper };
9251 out.push(Scalar::Bool(lower_ok && upper_ok));
9252 }
9253 Err(err) => return Err(ColumnError::Type(err)),
9254 }
9255 }
9256 Self::new(DType::Bool, out)
9257 }
9258
9259 pub fn factorize(&self) -> Result<(Self, Self), ColumnError> {
9264 self.factorize_with_options(false, true)
9265 }
9266
9267 pub fn factorize_with_options(
9274 &self,
9275 sort: bool,
9276 use_na_sentinel: bool,
9277 ) -> Result<(Self, Self), ColumnError> {
9278 #[derive(Hash, PartialEq, Eq, Clone, Copy)]
9286 enum LocalKey<'a> {
9287 Bool(bool),
9288 Int64(i64),
9289 FloatBits(u64),
9290 Utf8(&'a str),
9291 Timedelta64(i64),
9292 Datetime64(i64),
9293 Period(i64),
9294 Interval(u64, u64, IntervalClosed),
9295 }
9296 fn key_of(s: &Scalar) -> Option<LocalKey<'_>> {
9297 match s {
9298 Scalar::Null(_) => None,
9299 Scalar::Bool(b) => Some(LocalKey::Bool(*b)),
9300 Scalar::Int64(i) => Some(LocalKey::Int64(*i)),
9301 Scalar::Float64(f) => {
9302 if f.is_nan() {
9303 None
9304 } else {
9305 let normalized = if *f == 0.0 { 0.0 } else { *f };
9306 Some(LocalKey::FloatBits(normalized.to_bits()))
9307 }
9308 }
9309 Scalar::Utf8(s) => Some(LocalKey::Utf8(s.as_str())),
9310 Scalar::Timedelta64(t) => {
9311 if *t == Timedelta::NAT {
9312 None
9313 } else {
9314 Some(LocalKey::Timedelta64(*t))
9315 }
9316 }
9317 Scalar::Datetime64(t) => {
9318 if *t == Timestamp::NAT {
9319 None
9320 } else {
9321 Some(LocalKey::Datetime64(*t))
9322 }
9323 }
9324 Scalar::Period(p) => {
9325 if *p == i64::MIN {
9326 None
9327 } else {
9328 Some(LocalKey::Period(*p))
9329 }
9330 }
9331 Scalar::Interval(interval) => {
9332 let (left, right, closed) = interval_key(interval);
9333 Some(LocalKey::Interval(left, right, closed))
9334 }
9335 }
9336 }
9337
9338 let (mut codes, mut uniques): (Vec<Scalar>, Vec<Scalar>) = if let Some((data, min, range)) =
9339 self.as_i64_slice()
9340 .and_then(|d| i64_direct_address_range(d).map(|(m, r)| (d, m, r)))
9341 {
9342 let mut code_table = vec![-1i64; range];
9348 let mut uniques: Vec<Scalar> = Vec::new();
9349 let mut codes: Vec<Scalar> = Vec::with_capacity(data.len());
9350 for &v in data {
9351 let slot = (v as i128 - min as i128) as usize;
9352 let existing = code_table[slot];
9353 if existing < 0 {
9354 let code = uniques.len() as i64;
9355 code_table[slot] = code;
9356 uniques.push(Scalar::Int64(v));
9357 codes.push(Scalar::Int64(code));
9358 } else {
9359 codes.push(Scalar::Int64(existing));
9360 }
9361 }
9362 (codes, uniques)
9363 } else {
9364 let mut uniques: Vec<Scalar> = Vec::new();
9365 let mut idx_map: FxHashMap<LocalKey<'_>, i64> = FxHashMap::default();
9366 let mut missing_position: Option<i64> = None;
9367 let mut codes: Vec<Scalar> = Vec::with_capacity(self.values.len());
9368
9369 for value in &self.values {
9370 if value.is_missing() {
9371 if use_na_sentinel {
9372 codes.push(Scalar::Int64(-1));
9373 } else if let Some(p) = missing_position {
9374 codes.push(Scalar::Int64(p));
9375 } else {
9376 let code = uniques.len() as i64;
9377 missing_position = Some(code);
9378 uniques.push(value.clone());
9379 codes.push(Scalar::Int64(code));
9380 }
9381 continue;
9382 }
9383 let Some(key) = key_of(value) else {
9384 codes.push(Scalar::Int64(-1));
9387 continue;
9388 };
9389 match idx_map.get(&key) {
9390 Some(&p) => codes.push(Scalar::Int64(p)),
9391 None => {
9392 let code = uniques.len() as i64;
9393 idx_map.insert(key, code);
9394 uniques.push(value.clone());
9395 codes.push(Scalar::Int64(code));
9396 }
9397 }
9398 }
9399 drop(idx_map);
9400 (codes, uniques)
9401 };
9402
9403 if sort && !uniques.is_empty() {
9404 let mut ordering: Vec<usize> = (0..uniques.len()).collect();
9405 ordering.sort_by(|left, right| {
9406 compare_scalars_na_last(&uniques[*left], &uniques[*right], true)
9407 });
9408
9409 let mut remap = vec![0usize; uniques.len()];
9410 let sorted_uniques: Vec<Scalar> = ordering
9411 .into_iter()
9412 .enumerate()
9413 .map(|(sorted_position, original_position)| {
9414 remap[original_position] = sorted_position;
9415 uniques[original_position].clone()
9416 })
9417 .collect();
9418
9419 for code in &mut codes {
9420 if let Scalar::Int64(value) = code
9421 && *value >= 0
9422 {
9423 *value = remap[*value as usize] as i64;
9424 }
9425 }
9426
9427 uniques = sorted_uniques;
9428 }
9429
9430 let codes_col = Self::new(DType::Int64, codes)?;
9431 let uniques_col = Self::new(self.dtype, uniques)?;
9432 Ok((codes_col, uniques_col))
9433 }
9434
9435 pub fn abs(&self) -> Result<Self, ColumnError> {
9441 if let Some(data) = self.as_i64_slice() {
9447 return Ok(Self::from_i64_values(
9448 data.iter().map(|&x| x.wrapping_abs()).collect(),
9449 ));
9450 }
9451 if let Some(data) = self.as_f64_slice() {
9452 return Ok(Self::from_f64_values(
9453 data.iter().map(|&x| x.abs()).collect(),
9454 ));
9455 }
9456
9457 let mut out = Vec::with_capacity(self.values.len());
9458 for v in &self.values {
9459 if v.is_missing() {
9460 out.push(v.clone());
9461 continue;
9462 }
9463 match v {
9464 Scalar::Bool(x) => out.push(Scalar::Bool(*x)),
9465 Scalar::Int64(x) => out.push(Scalar::Int64(x.wrapping_abs())),
9466 Scalar::Float64(x) => out.push(Scalar::Float64(x.abs())),
9467 Scalar::Timedelta64(x) if *x != Timedelta::NAT => {
9468 out.push(Scalar::Timedelta64(x.wrapping_abs()))
9469 }
9470 _ => {
9471 return Err(ColumnError::Type(TypeError::NonNumericValue {
9472 value: format!("{v:?}"),
9473 dtype: self.dtype,
9474 }));
9475 }
9476 }
9477 }
9478 Self::new(self.dtype, out)
9479 }
9480
9481 pub fn fabs(&self) -> Result<Self, ColumnError> {
9483 self.abs()
9484 }
9485
9486 pub fn absolute(&self) -> Result<Self, ColumnError> {
9488 self.abs()
9489 }
9490
9491 pub fn neg(&self) -> Result<Self, ColumnError> {
9493 if let Some(data) = self.as_i64_slice() {
9497 return Ok(Self::from_i64_values(
9498 data.iter().map(|&x| x.wrapping_neg()).collect(),
9499 ));
9500 }
9501 if let Some(data) = self.as_f64_slice() {
9502 return Ok(Self::from_f64_values(data.iter().map(|&x| -x).collect()));
9503 }
9504 let mut out = Vec::with_capacity(self.values.len());
9505 for v in &self.values {
9506 if v.is_missing() {
9507 out.push(v.clone());
9508 continue;
9509 }
9510 match v {
9511 Scalar::Int64(x) => out.push(Scalar::Int64(x.wrapping_neg())),
9512 Scalar::Float64(x) => out.push(Scalar::Float64(-x)),
9513 Scalar::Timedelta64(x) if *x != Timedelta::NAT => {
9514 out.push(Scalar::Timedelta64(x.wrapping_neg()))
9515 }
9516 _ => {
9517 return Err(ColumnError::Type(TypeError::NonNumericValue {
9518 value: format!("{v:?}"),
9519 dtype: self.dtype,
9520 }));
9521 }
9522 }
9523 }
9524 Self::new(self.dtype, out)
9525 }
9526
9527 pub fn positive(&self) -> Result<Self, ColumnError> {
9529 for v in &self.values {
9530 if v.is_missing() {
9531 continue;
9532 }
9533 match v {
9534 Scalar::Int64(_) | Scalar::Float64(_) | Scalar::Timedelta64(_) => {}
9535 _ => {
9536 return Err(ColumnError::Type(TypeError::NonNumericValue {
9537 value: format!("{v:?}"),
9538 dtype: self.dtype,
9539 }));
9540 }
9541 }
9542 }
9543 Ok(self.clone())
9544 }
9545
9546 pub fn negative(&self) -> Result<Self, ColumnError> {
9548 self.neg()
9549 }
9550
9551 pub fn sqrt(&self) -> Result<Self, ColumnError> {
9553 if let Some(out) = self.typed_float_unary(f64::sqrt) {
9554 return Ok(out);
9555 }
9556 let mut out = Vec::with_capacity(self.values.len());
9557 for v in &self.values {
9558 if v.is_missing() {
9559 out.push(Scalar::Float64(f64::NAN));
9560 continue;
9561 }
9562 match v {
9563 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).sqrt())),
9564 Scalar::Float64(x) => out.push(Scalar::Float64(x.sqrt())),
9565 _ => {
9566 return Err(ColumnError::Type(TypeError::NonNumericValue {
9567 value: format!("{v:?}"),
9568 dtype: self.dtype,
9569 }));
9570 }
9571 }
9572 }
9573 Self::new(DType::Float64, out)
9574 }
9575
9576 pub fn exp(&self) -> Result<Self, ColumnError> {
9578 if let Some(out) = self.typed_float_unary(f64::exp) {
9579 return Ok(out);
9580 }
9581 let mut out = Vec::with_capacity(self.values.len());
9582 for v in &self.values {
9583 if v.is_missing() {
9584 out.push(Scalar::Float64(f64::NAN));
9585 continue;
9586 }
9587 match v {
9588 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).exp())),
9589 Scalar::Float64(x) => out.push(Scalar::Float64(x.exp())),
9590 _ => {
9591 return Err(ColumnError::Type(TypeError::NonNumericValue {
9592 value: format!("{v:?}"),
9593 dtype: self.dtype,
9594 }));
9595 }
9596 }
9597 }
9598 Self::new(DType::Float64, out)
9599 }
9600
9601 pub fn log(&self) -> Result<Self, ColumnError> {
9603 if let Some(out) = self.typed_float_unary(f64::ln) {
9604 return Ok(out);
9605 }
9606 let mut out = Vec::with_capacity(self.values.len());
9607 for v in &self.values {
9608 if v.is_missing() {
9609 out.push(Scalar::Float64(f64::NAN));
9610 continue;
9611 }
9612 match v {
9613 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).ln())),
9614 Scalar::Float64(x) => out.push(Scalar::Float64(x.ln())),
9615 _ => {
9616 return Err(ColumnError::Type(TypeError::NonNumericValue {
9617 value: format!("{v:?}"),
9618 dtype: self.dtype,
9619 }));
9620 }
9621 }
9622 }
9623 Self::new(DType::Float64, out)
9624 }
9625
9626 pub fn log10(&self) -> Result<Self, ColumnError> {
9628 if let Some(out) = self.typed_float_unary(f64::log10) {
9629 return Ok(out);
9630 }
9631 let mut out = Vec::with_capacity(self.values.len());
9632 for v in &self.values {
9633 if v.is_missing() {
9634 out.push(Scalar::Float64(f64::NAN));
9635 continue;
9636 }
9637 match v {
9638 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).log10())),
9639 Scalar::Float64(x) => out.push(Scalar::Float64(x.log10())),
9640 _ => {
9641 return Err(ColumnError::Type(TypeError::NonNumericValue {
9642 value: format!("{v:?}"),
9643 dtype: self.dtype,
9644 }));
9645 }
9646 }
9647 }
9648 Self::new(DType::Float64, out)
9649 }
9650
9651 pub fn log2(&self) -> Result<Self, ColumnError> {
9653 if let Some(out) = self.typed_float_unary(f64::log2) {
9654 return Ok(out);
9655 }
9656 let mut out = Vec::with_capacity(self.values.len());
9657 for v in &self.values {
9658 if v.is_missing() {
9659 out.push(Scalar::Float64(f64::NAN));
9660 continue;
9661 }
9662 match v {
9663 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).log2())),
9664 Scalar::Float64(x) => out.push(Scalar::Float64(x.log2())),
9665 _ => {
9666 return Err(ColumnError::Type(TypeError::NonNumericValue {
9667 value: format!("{v:?}"),
9668 dtype: self.dtype,
9669 }));
9670 }
9671 }
9672 }
9673 Self::new(DType::Float64, out)
9674 }
9675
9676 pub fn sin(&self) -> Result<Self, ColumnError> {
9678 if let Some(out) = self.typed_float_unary(f64::sin) {
9679 return Ok(out);
9680 }
9681 let mut out = Vec::with_capacity(self.values.len());
9682 for v in &self.values {
9683 if v.is_missing() {
9684 out.push(Scalar::Float64(f64::NAN));
9685 continue;
9686 }
9687 match v {
9688 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).sin())),
9689 Scalar::Float64(x) => out.push(Scalar::Float64(x.sin())),
9690 _ => {
9691 return Err(ColumnError::Type(TypeError::NonNumericValue {
9692 value: format!("{v:?}"),
9693 dtype: self.dtype,
9694 }));
9695 }
9696 }
9697 }
9698 Self::new(DType::Float64, out)
9699 }
9700
9701 pub fn cos(&self) -> Result<Self, ColumnError> {
9703 if let Some(out) = self.typed_float_unary(f64::cos) {
9704 return Ok(out);
9705 }
9706 let mut out = Vec::with_capacity(self.values.len());
9707 for v in &self.values {
9708 if v.is_missing() {
9709 out.push(Scalar::Float64(f64::NAN));
9710 continue;
9711 }
9712 match v {
9713 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).cos())),
9714 Scalar::Float64(x) => out.push(Scalar::Float64(x.cos())),
9715 _ => {
9716 return Err(ColumnError::Type(TypeError::NonNumericValue {
9717 value: format!("{v:?}"),
9718 dtype: self.dtype,
9719 }));
9720 }
9721 }
9722 }
9723 Self::new(DType::Float64, out)
9724 }
9725
9726 pub fn tan(&self) -> Result<Self, ColumnError> {
9728 if let Some(out) = self.typed_float_unary(f64::tan) {
9729 return Ok(out);
9730 }
9731 let mut out = Vec::with_capacity(self.values.len());
9732 for v in &self.values {
9733 if v.is_missing() {
9734 out.push(Scalar::Float64(f64::NAN));
9735 continue;
9736 }
9737 match v {
9738 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).tan())),
9739 Scalar::Float64(x) => out.push(Scalar::Float64(x.tan())),
9740 _ => {
9741 return Err(ColumnError::Type(TypeError::NonNumericValue {
9742 value: format!("{v:?}"),
9743 dtype: self.dtype,
9744 }));
9745 }
9746 }
9747 }
9748 Self::new(DType::Float64, out)
9749 }
9750
9751 pub fn asin(&self) -> Result<Self, ColumnError> {
9753 if let Some(out) = self.typed_float_unary(f64::asin) {
9754 return Ok(out);
9755 }
9756 let mut out = Vec::with_capacity(self.values.len());
9757 for v in &self.values {
9758 if v.is_missing() {
9759 out.push(Scalar::Float64(f64::NAN));
9760 continue;
9761 }
9762 match v {
9763 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).asin())),
9764 Scalar::Float64(x) => out.push(Scalar::Float64(x.asin())),
9765 _ => {
9766 return Err(ColumnError::Type(TypeError::NonNumericValue {
9767 value: format!("{v:?}"),
9768 dtype: self.dtype,
9769 }));
9770 }
9771 }
9772 }
9773 Self::new(DType::Float64, out)
9774 }
9775
9776 pub fn acos(&self) -> Result<Self, ColumnError> {
9778 if let Some(out) = self.typed_float_unary(f64::acos) {
9779 return Ok(out);
9780 }
9781 let mut out = Vec::with_capacity(self.values.len());
9782 for v in &self.values {
9783 if v.is_missing() {
9784 out.push(Scalar::Float64(f64::NAN));
9785 continue;
9786 }
9787 match v {
9788 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).acos())),
9789 Scalar::Float64(x) => out.push(Scalar::Float64(x.acos())),
9790 _ => {
9791 return Err(ColumnError::Type(TypeError::NonNumericValue {
9792 value: format!("{v:?}"),
9793 dtype: self.dtype,
9794 }));
9795 }
9796 }
9797 }
9798 Self::new(DType::Float64, out)
9799 }
9800
9801 pub fn atan(&self) -> Result<Self, ColumnError> {
9803 if let Some(out) = self.typed_float_unary(f64::atan) {
9804 return Ok(out);
9805 }
9806 let mut out = Vec::with_capacity(self.values.len());
9807 for v in &self.values {
9808 if v.is_missing() {
9809 out.push(Scalar::Float64(f64::NAN));
9810 continue;
9811 }
9812 match v {
9813 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).atan())),
9814 Scalar::Float64(x) => out.push(Scalar::Float64(x.atan())),
9815 _ => {
9816 return Err(ColumnError::Type(TypeError::NonNumericValue {
9817 value: format!("{v:?}"),
9818 dtype: self.dtype,
9819 }));
9820 }
9821 }
9822 }
9823 Self::new(DType::Float64, out)
9824 }
9825
9826 pub fn sinh(&self) -> Result<Self, ColumnError> {
9828 if let Some(out) = self.typed_float_unary(f64::sinh) {
9829 return Ok(out);
9830 }
9831 let mut out = Vec::with_capacity(self.values.len());
9832 for v in &self.values {
9833 if v.is_missing() {
9834 out.push(Scalar::Float64(f64::NAN));
9835 continue;
9836 }
9837 match v {
9838 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).sinh())),
9839 Scalar::Float64(x) => out.push(Scalar::Float64(x.sinh())),
9840 _ => {
9841 return Err(ColumnError::Type(TypeError::NonNumericValue {
9842 value: format!("{v:?}"),
9843 dtype: self.dtype,
9844 }));
9845 }
9846 }
9847 }
9848 Self::new(DType::Float64, out)
9849 }
9850
9851 pub fn cosh(&self) -> Result<Self, ColumnError> {
9853 if let Some(out) = self.typed_float_unary(f64::cosh) {
9854 return Ok(out);
9855 }
9856 let mut out = Vec::with_capacity(self.values.len());
9857 for v in &self.values {
9858 if v.is_missing() {
9859 out.push(Scalar::Float64(f64::NAN));
9860 continue;
9861 }
9862 match v {
9863 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).cosh())),
9864 Scalar::Float64(x) => out.push(Scalar::Float64(x.cosh())),
9865 _ => {
9866 return Err(ColumnError::Type(TypeError::NonNumericValue {
9867 value: format!("{v:?}"),
9868 dtype: self.dtype,
9869 }));
9870 }
9871 }
9872 }
9873 Self::new(DType::Float64, out)
9874 }
9875
9876 pub fn tanh(&self) -> Result<Self, ColumnError> {
9878 if let Some(out) = self.typed_float_unary(f64::tanh) {
9879 return Ok(out);
9880 }
9881 let mut out = Vec::with_capacity(self.values.len());
9882 for v in &self.values {
9883 if v.is_missing() {
9884 out.push(Scalar::Float64(f64::NAN));
9885 continue;
9886 }
9887 match v {
9888 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).tanh())),
9889 Scalar::Float64(x) => out.push(Scalar::Float64(x.tanh())),
9890 _ => {
9891 return Err(ColumnError::Type(TypeError::NonNumericValue {
9892 value: format!("{v:?}"),
9893 dtype: self.dtype,
9894 }));
9895 }
9896 }
9897 }
9898 Self::new(DType::Float64, out)
9899 }
9900
9901 pub fn asinh(&self) -> Result<Self, ColumnError> {
9903 if let Some(out) = self.typed_float_unary(f64::asinh) {
9904 return Ok(out);
9905 }
9906 let mut out = Vec::with_capacity(self.values.len());
9907 for v in &self.values {
9908 if v.is_missing() {
9909 out.push(Scalar::Float64(f64::NAN));
9910 continue;
9911 }
9912 match v {
9913 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).asinh())),
9914 Scalar::Float64(x) => out.push(Scalar::Float64(x.asinh())),
9915 _ => {
9916 return Err(ColumnError::Type(TypeError::NonNumericValue {
9917 value: format!("{v:?}"),
9918 dtype: self.dtype,
9919 }));
9920 }
9921 }
9922 }
9923 Self::new(DType::Float64, out)
9924 }
9925
9926 pub fn acosh(&self) -> Result<Self, ColumnError> {
9928 if let Some(out) = self.typed_float_unary(f64::acosh) {
9929 return Ok(out);
9930 }
9931 let mut out = Vec::with_capacity(self.values.len());
9932 for v in &self.values {
9933 if v.is_missing() {
9934 out.push(Scalar::Float64(f64::NAN));
9935 continue;
9936 }
9937 match v {
9938 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).acosh())),
9939 Scalar::Float64(x) => out.push(Scalar::Float64(x.acosh())),
9940 _ => {
9941 return Err(ColumnError::Type(TypeError::NonNumericValue {
9942 value: format!("{v:?}"),
9943 dtype: self.dtype,
9944 }));
9945 }
9946 }
9947 }
9948 Self::new(DType::Float64, out)
9949 }
9950
9951 pub fn atanh(&self) -> Result<Self, ColumnError> {
9953 if let Some(out) = self.typed_float_unary(f64::atanh) {
9954 return Ok(out);
9955 }
9956 let mut out = Vec::with_capacity(self.values.len());
9957 for v in &self.values {
9958 if v.is_missing() {
9959 out.push(Scalar::Float64(f64::NAN));
9960 continue;
9961 }
9962 match v {
9963 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).atanh())),
9964 Scalar::Float64(x) => out.push(Scalar::Float64(x.atanh())),
9965 _ => {
9966 return Err(ColumnError::Type(TypeError::NonNumericValue {
9967 value: format!("{v:?}"),
9968 dtype: self.dtype,
9969 }));
9970 }
9971 }
9972 }
9973 Self::new(DType::Float64, out)
9974 }
9975
9976 pub fn arcsin(&self) -> Result<Self, ColumnError> {
9980 self.asin()
9981 }
9982
9983 pub fn arccos(&self) -> Result<Self, ColumnError> {
9987 self.acos()
9988 }
9989
9990 pub fn arctan(&self) -> Result<Self, ColumnError> {
9994 self.atan()
9995 }
9996
9997 pub fn arctan2(&self, other: &Self) -> Result<Self, ColumnError> {
10001 self.atan2(other)
10002 }
10003
10004 pub fn arcsinh(&self) -> Result<Self, ColumnError> {
10008 self.asinh()
10009 }
10010
10011 pub fn arccosh(&self) -> Result<Self, ColumnError> {
10015 self.acosh()
10016 }
10017
10018 pub fn arctanh(&self) -> Result<Self, ColumnError> {
10022 self.atanh()
10023 }
10024
10025 fn typed_float_unary(&self, f: fn(f64) -> f64) -> Option<Self> {
10036 if let Some(data) = self.as_f64_slice() {
10037 return Some(Self::from_f64_values(data.iter().map(|&x| f(x)).collect()));
10038 }
10039 if let Some(data) = self.as_i64_slice() {
10040 return Some(Self::from_f64_values(
10041 data.iter().map(|&x| f(x as f64)).collect(),
10042 ));
10043 }
10044 None
10045 }
10046
10047 fn all_valid_as_f64(&self) -> Option<Vec<f64>> {
10051 if let Some(s) = self.as_f64_slice() {
10052 return Some(s.to_vec());
10053 }
10054 if let Some(s) = self.as_i64_slice() {
10055 return Some(s.iter().map(|&x| x as f64).collect());
10056 }
10057 None
10058 }
10059
10060 fn typed_float_binary(&self, other: &Self, f: fn(f64, f64) -> f64) -> Option<Self> {
10068 let a = self.all_valid_as_f64()?;
10069 let b = other.all_valid_as_f64()?;
10070 Some(Self::from_f64_values(
10071 a.iter().zip(b.iter()).map(|(&x, &y)| f(x, y)).collect(),
10072 ))
10073 }
10074
10075 pub fn floor(&self) -> Result<Self, ColumnError> {
10076 if let Some(out) = self.typed_float_unary(f64::floor) {
10077 return Ok(out);
10078 }
10079 let mut out = Vec::with_capacity(self.values.len());
10080 for v in &self.values {
10081 if v.is_missing() {
10082 out.push(Scalar::Float64(f64::NAN));
10083 continue;
10084 }
10085 match v {
10086 Scalar::Int64(x) => out.push(Scalar::Float64(*x as f64)),
10087 Scalar::Float64(x) => out.push(Scalar::Float64(x.floor())),
10088 _ => {
10089 return Err(ColumnError::Type(TypeError::NonNumericValue {
10090 value: format!("{v:?}"),
10091 dtype: self.dtype,
10092 }));
10093 }
10094 }
10095 }
10096 Self::new(DType::Float64, out)
10097 }
10098
10099 pub fn ceil(&self) -> Result<Self, ColumnError> {
10101 if let Some(out) = self.typed_float_unary(f64::ceil) {
10102 return Ok(out);
10103 }
10104 let mut out = Vec::with_capacity(self.values.len());
10105 for v in &self.values {
10106 if v.is_missing() {
10107 out.push(Scalar::Float64(f64::NAN));
10108 continue;
10109 }
10110 match v {
10111 Scalar::Int64(x) => out.push(Scalar::Float64(*x as f64)),
10112 Scalar::Float64(x) => out.push(Scalar::Float64(x.ceil())),
10113 _ => {
10114 return Err(ColumnError::Type(TypeError::NonNumericValue {
10115 value: format!("{v:?}"),
10116 dtype: self.dtype,
10117 }));
10118 }
10119 }
10120 }
10121 Self::new(DType::Float64, out)
10122 }
10123
10124 pub fn trunc(&self) -> Result<Self, ColumnError> {
10126 if let Some(out) = self.typed_float_unary(f64::trunc) {
10127 return Ok(out);
10128 }
10129 let mut out = Vec::with_capacity(self.values.len());
10130 for v in &self.values {
10131 if v.is_missing() {
10132 out.push(Scalar::Float64(f64::NAN));
10133 continue;
10134 }
10135 match v {
10136 Scalar::Int64(x) => out.push(Scalar::Float64(*x as f64)),
10137 Scalar::Float64(x) => out.push(Scalar::Float64(x.trunc())),
10138 _ => {
10139 return Err(ColumnError::Type(TypeError::NonNumericValue {
10140 value: format!("{v:?}"),
10141 dtype: self.dtype,
10142 }));
10143 }
10144 }
10145 }
10146 Self::new(DType::Float64, out)
10147 }
10148
10149 pub fn nan_to_num(&self) -> Result<Self, ColumnError> {
10154 self.nan_to_num_with_values(0.0, f64::MAX, f64::MIN)
10155 }
10156
10157 pub fn nan_to_num_with_values(
10161 &self,
10162 nan: f64,
10163 posinf: f64,
10164 neginf: f64,
10165 ) -> Result<Self, ColumnError> {
10166 if let Some(data) = self.as_f64_slice() {
10172 return Ok(Self::from_f64_values(
10173 data.iter()
10174 .map(|&x| {
10175 if x == f64::INFINITY {
10176 posinf
10177 } else if x == f64::NEG_INFINITY {
10178 neginf
10179 } else {
10180 x
10181 }
10182 })
10183 .collect(),
10184 ));
10185 }
10186 if let Some(data) = self.as_i64_slice() {
10187 return Ok(Self::from_f64_values(
10188 data.iter().map(|&x| x as f64).collect(),
10189 ));
10190 }
10191 let mut out = Vec::with_capacity(self.values.len());
10192 for v in &self.values {
10193 let result = match v {
10194 Scalar::Float64(x) => {
10195 if x.is_nan() {
10196 nan
10197 } else if *x == f64::INFINITY {
10198 posinf
10199 } else if *x == f64::NEG_INFINITY {
10200 neginf
10201 } else {
10202 *x
10203 }
10204 }
10205 Scalar::Int64(x) => *x as f64,
10206 Scalar::Null(_) => nan,
10207 _ => {
10208 return Err(ColumnError::Type(TypeError::NonNumericValue {
10209 value: format!("{v:?}"),
10210 dtype: self.dtype,
10211 }));
10212 }
10213 };
10214 out.push(Scalar::Float64(result));
10215 }
10216 Self::new(DType::Float64, out)
10217 }
10218
10219 pub fn rint(&self) -> Result<Self, ColumnError> {
10224 if let Some(out) = self.typed_float_unary(f64::round_ties_even) {
10227 return Ok(out);
10228 }
10229 let mut out = Vec::with_capacity(self.values.len());
10230 for v in &self.values {
10231 if v.is_missing() {
10232 out.push(Scalar::Float64(f64::NAN));
10233 continue;
10234 }
10235 match v {
10236 Scalar::Int64(x) => out.push(Scalar::Float64(*x as f64)),
10237 Scalar::Float64(x) => out.push(Scalar::Float64(x.round_ties_even())),
10238 _ => {
10239 return Err(ColumnError::Type(TypeError::NonNumericValue {
10240 value: format!("{v:?}"),
10241 dtype: self.dtype,
10242 }));
10243 }
10244 }
10245 }
10246 Self::new(DType::Float64, out)
10247 }
10248
10249 pub fn fix(&self) -> Result<Self, ColumnError> {
10253 self.trunc()
10254 }
10255
10256 pub fn trim_zeros(&self, trim: &str) -> Result<Self, ColumnError> {
10263 let values = &self.values;
10264 if values.is_empty() {
10265 return Self::new(self.dtype, vec![]);
10266 }
10267
10268 let is_zero = |s: &Scalar| -> bool {
10269 match s {
10270 Scalar::Int64(x) => *x == 0,
10271 Scalar::Float64(x) => *x == 0.0,
10272 Scalar::Bool(b) => !*b,
10273 _ => false,
10274 }
10275 };
10276
10277 let mut start = 0;
10278 let mut end = values.len();
10279
10280 if trim.contains('f') {
10281 while start < end && is_zero(&values[start]) {
10282 start += 1;
10283 }
10284 }
10285
10286 if trim.contains('b') {
10287 while end > start && is_zero(&values[end - 1]) {
10288 end -= 1;
10289 }
10290 }
10291
10292 Self::new(self.dtype, values[start..end].to_vec())
10293 }
10294
10295 pub fn around(&self, decimals: i32) -> Result<Self, ColumnError> {
10300 let factor = 10.0_f64.powi(decimals);
10301 let mut out = Vec::with_capacity(self.values.len());
10302 for v in &self.values {
10303 if v.is_missing() {
10304 out.push(Scalar::Float64(f64::NAN));
10305 continue;
10306 }
10307 match v {
10308 Scalar::Int64(x) => {
10309 if decimals >= 0 {
10310 out.push(Scalar::Int64(*x));
10311 } else {
10312 let rounded = ((*x as f64) * factor).round_ties_even() / factor;
10315 out.push(Scalar::Int64(rounded as i64));
10316 }
10317 }
10318 Scalar::Float64(x) => {
10319 let rounded = (*x * factor).round_ties_even() / factor;
10320 out.push(Scalar::Float64(rounded));
10321 }
10322 _ => {
10323 return Err(ColumnError::Type(TypeError::NonNumericValue {
10324 value: format!("{v:?}"),
10325 dtype: self.dtype,
10326 }));
10327 }
10328 }
10329 }
10330 if decimals >= 0 && self.dtype == DType::Int64 {
10331 Self::new(DType::Int64, out)
10332 } else {
10333 Self::new(DType::Float64, out)
10334 }
10335 }
10336
10337 pub fn unwrap(&self, discont: Option<f64>) -> Result<Self, ColumnError> {
10343 let threshold = discont.unwrap_or(std::f64::consts::PI);
10344 let two_pi = 2.0 * std::f64::consts::PI;
10345
10346 let mut out = Vec::with_capacity(self.values.len());
10347 let mut offset = 0.0;
10348
10349 for (i, v) in self.values.iter().enumerate() {
10350 if v.is_missing() {
10351 out.push(Scalar::Float64(f64::NAN));
10352 continue;
10353 }
10354 let x = match v {
10355 Scalar::Int64(x) => *x as f64,
10356 Scalar::Float64(x) => *x,
10357 _ => {
10358 return Err(ColumnError::Type(TypeError::NonNumericValue {
10359 value: format!("{v:?}"),
10360 dtype: self.dtype,
10361 }));
10362 }
10363 };
10364
10365 if i == 0 {
10366 out.push(Scalar::Float64(x));
10367 } else {
10368 let prev = match &out[out.len() - 1] {
10369 Scalar::Float64(p) if !p.is_nan() => *p,
10370 _ => {
10371 out.push(Scalar::Float64(x + offset));
10372 continue;
10373 }
10374 };
10375
10376 let diff = x + offset - prev;
10377 if diff > threshold {
10378 offset -= two_pi * ((diff + std::f64::consts::PI) / two_pi).floor();
10379 } else if diff < -threshold {
10380 offset += two_pi * ((-diff + std::f64::consts::PI) / two_pi).floor();
10381 }
10382 out.push(Scalar::Float64(x + offset));
10383 }
10384 }
10385
10386 Self::new(DType::Float64, out)
10387 }
10388
10389 pub fn expm1(&self) -> Result<Self, ColumnError> {
10391 if let Some(out) = self.typed_float_unary(f64::exp_m1) {
10392 return Ok(out);
10393 }
10394 let mut out = Vec::with_capacity(self.values.len());
10395 for v in &self.values {
10396 if v.is_missing() {
10397 out.push(Scalar::Float64(f64::NAN));
10398 continue;
10399 }
10400 match v {
10401 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).exp_m1())),
10402 Scalar::Float64(x) => out.push(Scalar::Float64(x.exp_m1())),
10403 _ => {
10404 return Err(ColumnError::Type(TypeError::NonNumericValue {
10405 value: format!("{v:?}"),
10406 dtype: self.dtype,
10407 }));
10408 }
10409 }
10410 }
10411 Self::new(DType::Float64, out)
10412 }
10413
10414 pub fn log1p(&self) -> Result<Self, ColumnError> {
10416 if let Some(out) = self.typed_float_unary(f64::ln_1p) {
10417 return Ok(out);
10418 }
10419 let mut out = Vec::with_capacity(self.values.len());
10420 for v in &self.values {
10421 if v.is_missing() {
10422 out.push(Scalar::Float64(f64::NAN));
10423 continue;
10424 }
10425 match v {
10426 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).ln_1p())),
10427 Scalar::Float64(x) => out.push(Scalar::Float64(x.ln_1p())),
10428 _ => {
10429 return Err(ColumnError::Type(TypeError::NonNumericValue {
10430 value: format!("{v:?}"),
10431 dtype: self.dtype,
10432 }));
10433 }
10434 }
10435 }
10436 Self::new(DType::Float64, out)
10437 }
10438
10439 pub fn cbrt(&self) -> Result<Self, ColumnError> {
10441 if let Some(out) = self.typed_float_unary(f64::cbrt) {
10442 return Ok(out);
10443 }
10444 let mut out = Vec::with_capacity(self.values.len());
10445 for v in &self.values {
10446 if v.is_missing() {
10447 out.push(Scalar::Float64(f64::NAN));
10448 continue;
10449 }
10450 match v {
10451 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).cbrt())),
10452 Scalar::Float64(x) => out.push(Scalar::Float64(x.cbrt())),
10453 _ => {
10454 return Err(ColumnError::Type(TypeError::NonNumericValue {
10455 value: format!("{v:?}"),
10456 dtype: self.dtype,
10457 }));
10458 }
10459 }
10460 }
10461 Self::new(DType::Float64, out)
10462 }
10463
10464 pub fn ldexp(&self, exp: i32) -> Result<Self, ColumnError> {
10468 let multiplier = 2.0_f64.powi(exp);
10469 let mut out = Vec::with_capacity(self.values.len());
10470 for v in &self.values {
10471 if v.is_missing() {
10472 out.push(Scalar::Float64(f64::NAN));
10473 continue;
10474 }
10475 match v {
10476 Scalar::Int64(x) => out.push(Scalar::Float64(*x as f64 * multiplier)),
10477 Scalar::Float64(x) => out.push(Scalar::Float64(x * multiplier)),
10478 _ => {
10479 return Err(ColumnError::Type(TypeError::NonNumericValue {
10480 value: format!("{v:?}"),
10481 dtype: self.dtype,
10482 }));
10483 }
10484 }
10485 }
10486 Self::new(DType::Float64, out)
10487 }
10488
10489 pub fn modf(&self) -> Result<(Self, Self), ColumnError> {
10494 let mut frac = Vec::with_capacity(self.values.len());
10495 let mut int = Vec::with_capacity(self.values.len());
10496 for v in &self.values {
10497 if v.is_missing() {
10498 frac.push(Scalar::Float64(f64::NAN));
10499 int.push(Scalar::Float64(f64::NAN));
10500 continue;
10501 }
10502 match v {
10503 Scalar::Int64(x) => {
10504 frac.push(Scalar::Float64(0.0));
10505 int.push(Scalar::Float64(*x as f64));
10506 }
10507 Scalar::Float64(x) => {
10508 let i = x.trunc();
10509 let f = x - i;
10510 frac.push(Scalar::Float64(f));
10511 int.push(Scalar::Float64(i));
10512 }
10513 _ => {
10514 return Err(ColumnError::Type(TypeError::NonNumericValue {
10515 value: format!("{v:?}"),
10516 dtype: self.dtype,
10517 }));
10518 }
10519 }
10520 }
10521 Ok((
10522 Self::new(DType::Float64, frac)?,
10523 Self::new(DType::Float64, int)?,
10524 ))
10525 }
10526
10527 pub fn frexp(&self) -> Result<(Self, Self), ColumnError> {
10534 let mut mantissa = Vec::with_capacity(self.values.len());
10535 let mut exponent = Vec::with_capacity(self.values.len());
10536 for v in &self.values {
10537 if v.is_missing() {
10538 mantissa.push(Scalar::Float64(f64::NAN));
10539 exponent.push(Scalar::Int64(0));
10540 continue;
10541 }
10542 match v {
10543 Scalar::Int64(x) => {
10544 let f = *x as f64;
10545 if f == 0.0 {
10546 mantissa.push(Scalar::Float64(0.0));
10547 exponent.push(Scalar::Int64(0));
10548 } else {
10549 let bits = f.abs().to_bits();
10550 let exp_bits = ((bits >> 52) & 0x7ff) as i64;
10551 let exp = exp_bits - 1022; let mant_bits = (bits & 0x000f_ffff_ffff_ffff) | 0x3fe0_0000_0000_0000;
10553 let mant = f64::from_bits(mant_bits);
10554 let mant = if f < 0.0 { -mant } else { mant };
10555 mantissa.push(Scalar::Float64(mant));
10556 exponent.push(Scalar::Int64(exp));
10557 }
10558 }
10559 Scalar::Float64(x) => {
10560 if x.is_nan() {
10561 mantissa.push(Scalar::Float64(f64::NAN));
10562 exponent.push(Scalar::Int64(0));
10563 } else if x.is_infinite() {
10564 mantissa.push(Scalar::Float64(*x));
10565 exponent.push(Scalar::Int64(0));
10566 } else if *x == 0.0 {
10567 mantissa.push(Scalar::Float64(*x)); exponent.push(Scalar::Int64(0));
10569 } else {
10570 let bits = x.abs().to_bits();
10571 let exp_bits = ((bits >> 52) & 0x7ff) as i64;
10572 if exp_bits == 0 {
10573 let scaled = x.abs() * 2.0_f64.powi(64);
10575 let sbits = scaled.to_bits();
10576 let sexp_bits = ((sbits >> 52) & 0x7ff) as i64;
10577 let exp = sexp_bits - 1022 - 64;
10578 let mant_bits = (sbits & 0x000f_ffff_ffff_ffff) | 0x3fe0_0000_0000_0000;
10579 let mant = f64::from_bits(mant_bits);
10580 let mant = if *x < 0.0 { -mant } else { mant };
10581 mantissa.push(Scalar::Float64(mant));
10582 exponent.push(Scalar::Int64(exp));
10583 } else {
10584 let exp = exp_bits - 1022;
10585 let mant_bits = (bits & 0x000f_ffff_ffff_ffff) | 0x3fe0_0000_0000_0000;
10586 let mant = f64::from_bits(mant_bits);
10587 let mant = if *x < 0.0 { -mant } else { mant };
10588 mantissa.push(Scalar::Float64(mant));
10589 exponent.push(Scalar::Int64(exp));
10590 }
10591 }
10592 }
10593 _ => {
10594 return Err(ColumnError::Type(TypeError::NonNumericValue {
10595 value: format!("{v:?}"),
10596 dtype: self.dtype,
10597 }));
10598 }
10599 }
10600 }
10601 Ok((
10602 Self::new(DType::Float64, mantissa)?,
10603 Self::new(DType::Int64, exponent)?,
10604 ))
10605 }
10606
10607 pub fn nextafter(&self, other: &Self) -> Result<Self, ColumnError> {
10612 if self.len() != other.len() {
10613 return Err(ColumnError::LengthMismatch {
10614 left: self.len(),
10615 right: other.len(),
10616 });
10617 }
10618 let mut out = Vec::with_capacity(self.values.len());
10619 for (v1, v2) in self.values.iter().zip(other.values.iter()) {
10620 if v1.is_missing() || v2.is_missing() {
10621 out.push(Scalar::Float64(f64::NAN));
10622 continue;
10623 }
10624 let x = v1.to_f64().map_err(ColumnError::Type)?;
10625 let y = v2.to_f64().map_err(ColumnError::Type)?;
10626 let result = if x.is_nan() || y.is_nan() {
10627 f64::NAN
10628 } else if x == y {
10629 x
10630 } else if x == 0.0 {
10631 if y > 0.0 {
10633 f64::from_bits(1) } else {
10635 -f64::from_bits(1) }
10637 } else {
10638 let bits = x.to_bits() as i64;
10639 let next_bits = if (x > 0.0) == (y > x) {
10640 bits + 1
10641 } else {
10642 bits - 1
10643 };
10644 f64::from_bits(next_bits as u64)
10645 };
10646 out.push(Scalar::Float64(result));
10647 }
10648 Self::new(DType::Float64, out)
10649 }
10650
10651 pub fn isneginf(&self) -> Result<Self, ColumnError> {
10656 let mut out = Vec::with_capacity(self.values.len());
10657 for v in &self.values {
10658 if v.is_missing() {
10659 out.push(Scalar::Bool(false));
10660 continue;
10661 }
10662 match v {
10663 Scalar::Int64(_) => out.push(Scalar::Bool(false)),
10664 Scalar::Float64(x) => out.push(Scalar::Bool(*x == f64::NEG_INFINITY)),
10665 _ => out.push(Scalar::Bool(false)),
10666 }
10667 }
10668 Self::new(DType::Bool, out)
10669 }
10670
10671 pub fn isposinf(&self) -> Result<Self, ColumnError> {
10676 let mut out = Vec::with_capacity(self.values.len());
10677 for v in &self.values {
10678 if v.is_missing() {
10679 out.push(Scalar::Bool(false));
10680 continue;
10681 }
10682 match v {
10683 Scalar::Int64(_) => out.push(Scalar::Bool(false)),
10684 Scalar::Float64(x) => out.push(Scalar::Bool(*x == f64::INFINITY)),
10685 _ => out.push(Scalar::Bool(false)),
10686 }
10687 }
10688 Self::new(DType::Bool, out)
10689 }
10690
10691 pub fn exp2(&self) -> Result<Self, ColumnError> {
10695 if let Some(data) = self.as_f64_slice() {
10699 return Ok(Self::from_f64_values(
10700 data.iter().map(|&x| x.exp2()).collect(),
10701 ));
10702 }
10703 if let Some(data) = self.as_i64_slice() {
10704 return Ok(Self::from_f64_values(
10705 data.iter().map(|&x| 2.0_f64.powi(x as i32)).collect(),
10706 ));
10707 }
10708 let mut out = Vec::with_capacity(self.values.len());
10709 for v in &self.values {
10710 if v.is_missing() {
10711 out.push(Scalar::Float64(f64::NAN));
10712 continue;
10713 }
10714 match v {
10715 Scalar::Int64(x) => out.push(Scalar::Float64(2.0_f64.powi(*x as i32))),
10716 Scalar::Float64(x) => out.push(Scalar::Float64(x.exp2())),
10717 _ => {
10718 return Err(ColumnError::Type(TypeError::NonNumericValue {
10719 value: format!("{v:?}"),
10720 dtype: self.dtype,
10721 }));
10722 }
10723 }
10724 }
10725 Self::new(DType::Float64, out)
10726 }
10727
10728 pub fn sinc(&self) -> Result<Self, ColumnError> {
10732 if let Some(out) = self.typed_float_unary(|x| {
10735 if x == 0.0 {
10736 1.0
10737 } else {
10738 let px = std::f64::consts::PI * x;
10739 px.sin() / px
10740 }
10741 }) {
10742 return Ok(out);
10743 }
10744 let mut out = Vec::with_capacity(self.values.len());
10745 for v in &self.values {
10746 if v.is_missing() {
10747 out.push(Scalar::Float64(f64::NAN));
10748 continue;
10749 }
10750 match v {
10751 Scalar::Int64(x) => {
10752 if *x == 0 {
10753 out.push(Scalar::Float64(1.0));
10754 } else {
10755 let px = std::f64::consts::PI * (*x as f64);
10756 out.push(Scalar::Float64(px.sin() / px));
10757 }
10758 }
10759 Scalar::Float64(x) => {
10760 if *x == 0.0 {
10761 out.push(Scalar::Float64(1.0));
10762 } else if x.is_nan() {
10763 out.push(Scalar::Float64(f64::NAN));
10764 } else {
10765 let px = std::f64::consts::PI * x;
10766 out.push(Scalar::Float64(px.sin() / px));
10767 }
10768 }
10769 _ => {
10770 return Err(ColumnError::Type(TypeError::NonNumericValue {
10771 value: format!("{v:?}"),
10772 dtype: self.dtype,
10773 }));
10774 }
10775 }
10776 }
10777 Self::new(DType::Float64, out)
10778 }
10779
10780 pub fn logaddexp(&self, other: &Self) -> Result<Self, ColumnError> {
10785 if self.len() != other.len() {
10786 return Err(ColumnError::LengthMismatch {
10787 left: self.len(),
10788 right: other.len(),
10789 });
10790 }
10791 let mut out = Vec::with_capacity(self.values.len());
10792 for (v1, v2) in self.values.iter().zip(other.values.iter()) {
10793 if v1.is_missing() || v2.is_missing() {
10794 out.push(Scalar::Float64(f64::NAN));
10795 continue;
10796 }
10797 let x = v1.to_f64().map_err(ColumnError::Type)?;
10798 let y = v2.to_f64().map_err(ColumnError::Type)?;
10799 let result = if x.is_nan() || y.is_nan() {
10800 f64::NAN
10801 } else if x == f64::NEG_INFINITY {
10802 y
10803 } else if y == f64::NEG_INFINITY {
10804 x
10805 } else if x == f64::INFINITY || y == f64::INFINITY {
10806 f64::INFINITY
10807 } else if x >= y {
10808 x + (y - x).exp().ln_1p()
10809 } else {
10810 y + (x - y).exp().ln_1p()
10811 };
10812 out.push(Scalar::Float64(result));
10813 }
10814 Self::new(DType::Float64, out)
10815 }
10816
10817 pub fn logaddexp2(&self, other: &Self) -> Result<Self, ColumnError> {
10821 if self.len() != other.len() {
10822 return Err(ColumnError::LengthMismatch {
10823 left: self.len(),
10824 right: other.len(),
10825 });
10826 }
10827 let ln2 = std::f64::consts::LN_2;
10828 let mut out = Vec::with_capacity(self.values.len());
10829 for (v1, v2) in self.values.iter().zip(other.values.iter()) {
10830 if v1.is_missing() || v2.is_missing() {
10831 out.push(Scalar::Float64(f64::NAN));
10832 continue;
10833 }
10834 let x = v1.to_f64().map_err(ColumnError::Type)?;
10835 let y = v2.to_f64().map_err(ColumnError::Type)?;
10836 let result = if x.is_nan() || y.is_nan() {
10837 f64::NAN
10838 } else if x == f64::NEG_INFINITY {
10839 y
10840 } else if y == f64::NEG_INFINITY {
10841 x
10842 } else if x == f64::INFINITY || y == f64::INFINITY {
10843 f64::INFINITY
10844 } else if x >= y {
10845 x + ((y - x) * ln2).exp().ln_1p() / ln2
10846 } else {
10847 y + ((x - y) * ln2).exp().ln_1p() / ln2
10848 };
10849 out.push(Scalar::Float64(result));
10850 }
10851 Self::new(DType::Float64, out)
10852 }
10853
10854 pub fn spacing(&self) -> Result<Self, ColumnError> {
10859 let mut out = Vec::with_capacity(self.values.len());
10860 for v in &self.values {
10861 if v.is_missing() {
10862 out.push(Scalar::Float64(f64::NAN));
10863 continue;
10864 }
10865 match v {
10866 Scalar::Int64(x) => {
10867 let f = (*x as f64).abs();
10868 if f == 0.0 {
10869 out.push(Scalar::Float64(f64::from_bits(1)));
10870 } else {
10871 let bits = f.to_bits();
10872 let next = f64::from_bits(bits + 1);
10873 out.push(Scalar::Float64(next - f));
10874 }
10875 }
10876 Scalar::Float64(x) => {
10877 if x.is_nan() || x.is_infinite() {
10878 out.push(Scalar::Float64(f64::NAN));
10879 } else {
10880 let f = x.abs();
10881 if f == 0.0 {
10882 out.push(Scalar::Float64(f64::from_bits(1)));
10883 } else {
10884 let bits = f.to_bits();
10885 let next = f64::from_bits(bits + 1);
10886 out.push(Scalar::Float64(next - f));
10887 }
10888 }
10889 }
10890 _ => {
10891 return Err(ColumnError::Type(TypeError::NonNumericValue {
10892 value: format!("{v:?}"),
10893 dtype: self.dtype,
10894 }));
10895 }
10896 }
10897 }
10898 Self::new(DType::Float64, out)
10899 }
10900
10901 pub fn radians(&self) -> Result<Self, ColumnError> {
10903 if let Some(out) = self.typed_float_unary(f64::to_radians) {
10904 return Ok(out);
10905 }
10906 let mut out = Vec::with_capacity(self.values.len());
10907 for v in &self.values {
10908 if v.is_missing() {
10909 out.push(Scalar::Float64(f64::NAN));
10910 continue;
10911 }
10912 match v {
10913 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).to_radians())),
10914 Scalar::Float64(x) => out.push(Scalar::Float64(x.to_radians())),
10915 _ => {
10916 return Err(ColumnError::Type(TypeError::NonNumericValue {
10917 value: format!("{v:?}"),
10918 dtype: self.dtype,
10919 }));
10920 }
10921 }
10922 }
10923 Self::new(DType::Float64, out)
10924 }
10925
10926 pub fn deg2rad(&self) -> Result<Self, ColumnError> {
10928 self.radians()
10929 }
10930
10931 pub fn degrees(&self) -> Result<Self, ColumnError> {
10933 if let Some(out) = self.typed_float_unary(f64::to_degrees) {
10934 return Ok(out);
10935 }
10936 let mut out = Vec::with_capacity(self.values.len());
10937 for v in &self.values {
10938 if v.is_missing() {
10939 out.push(Scalar::Float64(f64::NAN));
10940 continue;
10941 }
10942 match v {
10943 Scalar::Int64(x) => out.push(Scalar::Float64((*x as f64).to_degrees())),
10944 Scalar::Float64(x) => out.push(Scalar::Float64(x.to_degrees())),
10945 _ => {
10946 return Err(ColumnError::Type(TypeError::NonNumericValue {
10947 value: format!("{v:?}"),
10948 dtype: self.dtype,
10949 }));
10950 }
10951 }
10952 }
10953 Self::new(DType::Float64, out)
10954 }
10955
10956 pub fn rad2deg(&self) -> Result<Self, ColumnError> {
10958 self.degrees()
10959 }
10960
10961 pub fn reciprocal(&self) -> Result<Self, ColumnError> {
10963 if let Some(out) = self.typed_float_unary(|x| 1.0 / x) {
10964 return Ok(out);
10965 }
10966 let mut out = Vec::with_capacity(self.values.len());
10967 for v in &self.values {
10968 if v.is_missing() {
10969 out.push(Scalar::Float64(f64::NAN));
10970 continue;
10971 }
10972 match v {
10973 Scalar::Int64(x) => out.push(Scalar::Float64(1.0 / (*x as f64))),
10974 Scalar::Float64(x) => out.push(Scalar::Float64(1.0 / x)),
10975 _ => {
10976 return Err(ColumnError::Type(TypeError::NonNumericValue {
10977 value: format!("{v:?}"),
10978 dtype: self.dtype,
10979 }));
10980 }
10981 }
10982 }
10983 Self::new(DType::Float64, out)
10984 }
10985
10986 pub fn square(&self) -> Result<Self, ColumnError> {
10988 if let Some(data) = self.as_i64_slice() {
10992 return Ok(Self::from_i64_values(data.iter().map(|&x| x * x).collect()));
10993 }
10994 if let Some(data) = self.as_f64_slice() {
10995 return Ok(Self::from_f64_values(data.iter().map(|&x| x * x).collect()));
10996 }
10997 let mut out = Vec::with_capacity(self.values.len());
10998 for v in &self.values {
10999 if v.is_missing() {
11000 out.push(Scalar::Float64(f64::NAN));
11001 continue;
11002 }
11003 match v {
11004 Scalar::Int64(x) => out.push(Scalar::Int64(x * x)),
11005 Scalar::Float64(x) => out.push(Scalar::Float64(x * x)),
11006 _ => {
11007 return Err(ColumnError::Type(TypeError::NonNumericValue {
11008 value: format!("{v:?}"),
11009 dtype: self.dtype,
11010 }));
11011 }
11012 }
11013 }
11014 let dtype = match self.dtype {
11015 DType::Int64 => DType::Int64,
11016 _ => DType::Float64,
11017 };
11018 Self::new(dtype, out)
11019 }
11020
11021 pub fn shift(&self, periods: i64, fill: Scalar) -> Result<Self, ColumnError> {
11028 let len = self.values.len();
11029 if len == 0 || periods == 0 {
11030 return Ok(self.clone());
11031 }
11032 let abs = periods.unsigned_abs() as usize;
11033 let mut out: Vec<Scalar> = Vec::with_capacity(len);
11034 if abs >= len {
11035 for _ in 0..len {
11036 out.push(fill.clone());
11037 }
11038 } else if periods > 0 {
11039 for _ in 0..abs {
11040 out.push(fill.clone());
11041 }
11042 out.extend_from_slice(&self.values[..len - abs]);
11043 } else {
11044 out.extend_from_slice(&self.values[abs..]);
11045 for _ in 0..abs {
11046 out.push(fill.clone());
11047 }
11048 }
11049 Self::new(self.dtype, out)
11050 }
11051
11052 pub fn clip(&self, lower: Option<f64>, upper: Option<f64>) -> Result<Self, ColumnError> {
11059 let clamp = |mut x: f64| {
11067 if let Some(lo) = lower
11068 && x < lo
11069 {
11070 x = lo;
11071 }
11072 if let Some(hi) = upper
11073 && x > hi
11074 {
11075 x = hi;
11076 }
11077 x
11078 };
11079 if let Some(data) = self.as_f64_slice() {
11080 let out: Vec<f64> = data.iter().map(|&x| clamp(x)).collect();
11081 return Ok(Self::from_f64_values(out));
11082 }
11083 if let Some(data) = self.as_i64_slice() {
11084 let out: Vec<f64> = data.iter().map(|&x| clamp(x as f64)).collect();
11085 return Ok(Self::from_f64_values(out));
11086 }
11087
11088 let mut out = Vec::with_capacity(self.values.len());
11089 for v in &self.values {
11090 if v.is_missing() {
11091 out.push(v.clone());
11092 continue;
11093 }
11094 let numeric = match v.to_f64() {
11095 Ok(x) => x,
11096 Err(err) => return Err(ColumnError::Type(err)),
11097 };
11098 let mut clipped = numeric;
11099 if let Some(lo) = lower
11100 && clipped < lo
11101 {
11102 clipped = lo;
11103 }
11104 if let Some(hi) = upper
11105 && clipped > hi
11106 {
11107 clipped = hi;
11108 }
11109 out.push(Scalar::Float64(clipped));
11110 }
11111 Self::new(DType::Float64, out)
11112 }
11113
11114 pub fn round(&self, decimals: i32) -> Result<Self, ColumnError> {
11122 if matches!(self.dtype, DType::Bool) || (self.dtype == DType::Int64 && decimals >= 0) {
11123 return Ok(self.clone());
11124 }
11125 if self.dtype == DType::Int64 {
11126 let out = self
11127 .values
11128 .iter()
11129 .map(|v| match v {
11130 Scalar::Int64(value) => {
11131 Scalar::Int64(round_i64_negative_decimals(*value, decimals))
11132 }
11133 Scalar::Null(kind) => Scalar::Null(*kind),
11134 other => other.clone(),
11135 })
11136 .collect();
11137 return Self::new(DType::Int64, out);
11138 }
11139 let factor = 10f64.powi(decimals);
11140 if let Some(data) = self.as_f64_slice() {
11148 return Ok(Self::from_f64_values(
11149 data.iter()
11150 .map(|&x| (x * factor).round_ties_even() / factor)
11151 .collect(),
11152 ));
11153 }
11154 let mut out = Vec::with_capacity(self.values.len());
11155 for v in &self.values {
11156 if v.is_missing() {
11157 out.push(v.clone());
11158 continue;
11159 }
11160 match v.to_f64() {
11161 Ok(x) => out.push(Scalar::Float64((x * factor).round_ties_even() / factor)),
11162 Err(err) => return Err(ColumnError::Type(err)),
11163 }
11164 }
11165 Self::new(DType::Float64, out)
11166 }
11167
11168 pub fn isin(&self, needles: &[Scalar]) -> Result<Self, ColumnError> {
11174 #[derive(Hash, PartialEq, Eq)]
11175 enum Key<'a> {
11176 Bool(bool),
11177 Int64(i64),
11178 FloatBits(u64),
11179 Utf8(&'a str),
11180 Timedelta64(i64),
11181 Datetime64(i64),
11182 Period(i64),
11183 Interval(u64, u64, IntervalClosed),
11184 }
11185 fn key_of(v: &Scalar) -> Option<Key<'_>> {
11186 if v.is_missing() {
11187 return None;
11188 }
11189 Some(match v {
11190 Scalar::Bool(b) => Key::Bool(*b),
11191 Scalar::Int64(i) => Key::Int64(*i),
11192 Scalar::Float64(f) => {
11193 let norm = if *f == 0.0 { 0.0 } else { *f };
11194 Key::FloatBits(norm.to_bits())
11195 }
11196 Scalar::Utf8(s) => Key::Utf8(s.as_str()),
11197 Scalar::Timedelta64(v) => Key::Timedelta64(*v),
11198 Scalar::Datetime64(v) => Key::Datetime64(*v),
11199 Scalar::Period(v) => Key::Period(*v),
11200 Scalar::Interval(v) => {
11201 let (left, right, closed) = interval_key(v);
11202 Key::Interval(left, right, closed)
11203 }
11204 Scalar::Null(_) => return None,
11205 })
11206 }
11207
11208 if let Some(data) = self.as_i64_slice() {
11217 let mut n_min = i64::MAX;
11218 let mut n_max = i64::MIN;
11219 let mut saw_int_needle = false;
11220 for needle in needles {
11221 if let Scalar::Int64(v) = needle {
11222 saw_int_needle = true;
11223 n_min = n_min.min(*v);
11224 n_max = n_max.max(*v);
11225 }
11226 }
11227 if !saw_int_needle {
11228 return Ok(Self::from_bool_values(vec![false; data.len()]));
11229 }
11230 let span = i128::from(n_max) - i128::from(n_min) + 1;
11231 if span > 0 && span <= (1i128 << 24) {
11232 let mut present = vec![false; span as usize];
11233 for needle in needles {
11234 if let Scalar::Int64(v) = needle {
11235 present[(v - n_min) as usize] = true;
11236 }
11237 }
11238 let out: Vec<bool> = data
11239 .iter()
11240 .map(|&v| v >= n_min && v <= n_max && present[(v - n_min) as usize])
11241 .collect();
11242 return Ok(Self::from_bool_values(out));
11243 }
11244 }
11245
11246 let mut lookup: FxHashSet<Key<'_>> = FxHashSet::default();
11247 for n in needles {
11248 if let Some(k) = key_of(n) {
11249 lookup.insert(k);
11250 }
11251 }
11252
11253 let out: Vec<bool> = self
11259 .values
11260 .iter()
11261 .map(|v| match key_of(v) {
11262 Some(k) => lookup.contains(&k),
11263 None => false,
11264 })
11265 .collect();
11266 Ok(Self::from_bool_values(out))
11267 }
11268
11269 pub fn unique(&self) -> Result<Self, ColumnError> {
11275 if let Some(data) = self.as_i64_slice()
11282 && let Some((min, range)) = i64_direct_address_range(data)
11283 {
11284 let mut seen = vec![false; range];
11285 let mut out: Vec<i64> = Vec::new();
11286 for &v in data {
11287 let slot = (v as i128 - min as i128) as usize;
11288 if !seen[slot] {
11289 seen[slot] = true;
11290 out.push(v);
11291 }
11292 }
11293 return Ok(Self::from_i64_values(out));
11294 }
11295
11296 #[derive(Hash, PartialEq, Eq)]
11297 enum Key<'a> {
11298 Bool(bool),
11299 Int64(i64),
11300 FloatBits(u64),
11301 Utf8(&'a str),
11302 Timedelta64(i64),
11303 Datetime64(i64),
11304 Period(i64),
11305 Interval(u64, u64, IntervalClosed),
11306 }
11307
11308 let mut seen: FxHashSet<Key<'_>> = FxHashSet::default();
11309 let mut out = Vec::new();
11310 for v in &self.values {
11311 if v.is_missing() {
11312 continue;
11313 }
11314 let key = match v {
11315 Scalar::Bool(b) => Key::Bool(*b),
11316 Scalar::Int64(i) => Key::Int64(*i),
11317 Scalar::Float64(f) => {
11318 let norm = if *f == 0.0 { 0.0 } else { *f };
11319 Key::FloatBits(norm.to_bits())
11320 }
11321 Scalar::Utf8(s) => Key::Utf8(s.as_str()),
11322 Scalar::Timedelta64(v) => Key::Timedelta64(*v),
11323 Scalar::Datetime64(v) => Key::Datetime64(*v),
11324 Scalar::Period(v) => Key::Period(*v),
11325 Scalar::Interval(v) => {
11326 let (left, right, closed) = interval_key(v);
11327 Key::Interval(left, right, closed)
11328 }
11329 Scalar::Null(_) => continue,
11330 };
11331 if seen.insert(key) {
11332 out.push(v.clone());
11333 }
11334 }
11335 Self::new(self.dtype, out)
11336 }
11337
11338 pub fn setdiff1d(&self, other: &Self) -> Result<Self, ColumnError> {
11342 let other_unique = other.unique()?;
11343 let other_set: FxHashSet<SetMemberKey<'_>> = other_unique
11346 .values()
11347 .iter()
11348 .filter_map(set_member_key)
11349 .collect();
11350 let mut seen: FxHashSet<SetMemberKey<'_>> = FxHashSet::default();
11351 let mut out = Vec::new();
11352 for v in &self.values {
11353 let Some(key) = set_member_key(v) else {
11354 continue;
11355 };
11356 if !other_set.contains(&key) && seen.insert(key) {
11357 out.push(v.clone());
11358 }
11359 }
11360 Self::new(self.dtype, out)
11361 }
11362
11363 pub fn intersect1d(&self, other: &Self) -> Result<Self, ColumnError> {
11367 let self_unique = self.unique()?;
11368 let other_unique = other.unique()?;
11369 let other_set: FxHashSet<SetMemberKey<'_>> = other_unique
11370 .values()
11371 .iter()
11372 .filter_map(set_member_key)
11373 .collect();
11374 let mut out = Vec::new();
11375 for v in self_unique.values() {
11376 let Some(key) = set_member_key(v) else {
11377 continue;
11378 };
11379 if other_set.contains(&key) {
11380 out.push(v.clone());
11381 }
11382 }
11383 Self::new(self.dtype, out)
11384 }
11385
11386 pub fn union1d(&self, other: &Self) -> Result<Self, ColumnError> {
11390 let mut combined = self.values.to_vec();
11391 combined.extend(other.values().iter().cloned());
11392 let temp = Self::new(self.dtype, combined)?;
11393 temp.unique()
11394 }
11395
11396 pub fn setxor1d(&self, other: &Self) -> Result<Self, ColumnError> {
11401 let a_unique = self.unique()?;
11402 let b_unique = other.unique()?;
11403 let a_set: FxHashSet<SetMemberKey<'_>> = a_unique
11404 .values()
11405 .iter()
11406 .filter_map(set_member_key)
11407 .collect();
11408 let b_set: FxHashSet<SetMemberKey<'_>> = b_unique
11409 .values()
11410 .iter()
11411 .filter_map(set_member_key)
11412 .collect();
11413 let mut out = Vec::new();
11414 for v in a_unique.values() {
11416 let Some(key) = set_member_key(v) else {
11417 continue;
11418 };
11419 if !b_set.contains(&key) {
11420 out.push(v.clone());
11421 }
11422 }
11423 for v in b_unique.values() {
11425 let Some(key) = set_member_key(v) else {
11426 continue;
11427 };
11428 if !a_set.contains(&key) {
11429 out.push(v.clone());
11430 }
11431 }
11432 Self::new(self.dtype, out)
11433 }
11434
11435 pub fn in1d(&self, other: &Self) -> Result<Self, ColumnError> {
11439 let other_unique = other.unique()?;
11440 let other_set: FxHashSet<SetMemberKey<'_>> = other_unique
11441 .values()
11442 .iter()
11443 .filter_map(set_member_key)
11444 .collect();
11445 let mut out = Vec::with_capacity(self.values.len());
11447 for v in &self.values {
11448 let found = match set_member_key(v) {
11449 Some(key) => other_set.contains(&key),
11450 None => false, };
11452 out.push(found);
11453 }
11454 Ok(Self::from_bool_values(out))
11455 }
11456
11457 pub fn value_counts(&self) -> Result<(Self, Self), ColumnError> {
11463 self.value_counts_with_options(false, true, false, true)
11464 }
11465
11466 pub fn value_counts_with_options(
11472 &self,
11473 normalize: bool,
11474 sort: bool,
11475 ascending: bool,
11476 dropna: bool,
11477 ) -> Result<(Self, Self), ColumnError> {
11478 let mut counts: Vec<(Scalar, usize)> = Vec::new();
11487 let mut index: rustc_hash::FxHashMap<SetMemberKey<'_>, usize> =
11488 rustc_hash::FxHashMap::default();
11489 let mut missing_count = 0_usize;
11490
11491 for value in &self.values {
11492 if value.is_missing() {
11493 missing_count += 1;
11494 continue;
11495 }
11496 let Some(key) = set_member_key(value) else {
11497 counts.push((value.clone(), 1));
11499 continue;
11500 };
11501 if let Some(&i) = index.get(&key) {
11502 counts[i].1 += 1;
11503 } else {
11504 index.insert(key, counts.len());
11505 counts.push((value.clone(), 1));
11506 }
11507 }
11508
11509 if !dropna && missing_count > 0 {
11510 counts.push((Scalar::Null(NullKind::NaN), missing_count));
11511 }
11512
11513 if sort {
11514 if ascending {
11515 counts.sort_by_key(|(_, count)| *count);
11516 } else {
11517 counts.sort_by_key(|(_, count)| std::cmp::Reverse(*count));
11518 }
11519 }
11520
11521 let total = if normalize {
11522 counts.iter().map(|(_, count)| *count).sum::<usize>() as f64
11523 } else {
11524 1.0
11525 };
11526
11527 let mut values_out = Vec::with_capacity(counts.len());
11528 let mut counts_out = Vec::with_capacity(counts.len());
11529 for (value, count) in counts {
11530 values_out.push(value);
11531 if normalize {
11532 let normalized = if total == 0.0 {
11533 0.0
11534 } else {
11535 count as f64 / total
11536 };
11537 counts_out.push(Scalar::Float64(normalized));
11538 } else {
11539 counts_out.push(Scalar::Int64(i64::try_from(count).unwrap_or(i64::MAX)));
11540 }
11541 }
11542
11543 let values = Self::new(self.dtype, values_out)?;
11544 let counts = Self::new(
11545 if normalize {
11546 DType::Float64
11547 } else {
11548 DType::Int64
11549 },
11550 counts_out,
11551 )?;
11552 Ok((values, counts))
11553 }
11554
11555 #[must_use]
11556 pub fn semantic_eq(&self, other: &Self) -> bool {
11557 self.dtype == other.dtype
11558 && self.values.len() == other.values.len()
11559 && self
11560 .values
11561 .iter()
11562 .zip(&other.values)
11563 .all(|(left, right)| left.semantic_eq(right))
11564 }
11565
11566 pub fn isclose(&self, other: &Self, rtol: f64, atol: f64) -> Result<Self, ColumnError> {
11570 if self.len() != other.len() {
11571 return Err(ColumnError::LengthMismatch {
11572 left: self.len(),
11573 right: other.len(),
11574 });
11575 }
11576 let mut out = Vec::with_capacity(self.values.len());
11577 for (a, b) in self.values.iter().zip(&other.values) {
11578 if a.is_missing() || b.is_missing() {
11579 out.push(Scalar::Bool(false));
11580 continue;
11581 }
11582 let af = a.to_f64().map_err(ColumnError::Type)?;
11583 let bf = b.to_f64().map_err(ColumnError::Type)?;
11584 let close = (af - bf).abs() <= atol + rtol * bf.abs();
11585 out.push(Scalar::Bool(close));
11586 }
11587 Self::new(DType::Bool, out)
11588 }
11589
11590 pub fn allclose(&self, other: &Self, rtol: f64, atol: f64) -> Result<bool, ColumnError> {
11594 let close = self.isclose(other, rtol, atol)?;
11595 for v in close.values() {
11596 match v {
11597 Scalar::Bool(true) => continue,
11598 Scalar::Bool(false) => return Ok(false),
11599 _ => return Ok(false),
11600 }
11601 }
11602 Ok(true)
11603 }
11604}
11605
11606pub struct CrackIndex {
11625 perm: Vec<usize>,
11628 cracks: Vec<(f64, usize)>,
11632}
11633
11634impl CrackIndex {
11635 #[must_use]
11637 pub fn new(len: usize) -> Self {
11638 Self {
11639 perm: (0..len).collect(),
11640 cracks: Vec::new(),
11641 }
11642 }
11643
11644 #[must_use]
11646 pub fn num_cracks(&self) -> usize {
11647 self.cracks.len()
11648 }
11649
11650 pub fn filter_gt(&mut self, column: &Column, value: f64) -> Vec<usize> {
11652 let split = self.crack_at(column, value);
11653 self.perm[split..].to_vec()
11654 }
11655
11656 pub fn filter_lte(&mut self, column: &Column, value: f64) -> Vec<usize> {
11658 let split = self.crack_at(column, value);
11659 self.perm[..split]
11660 .iter()
11661 .copied()
11662 .filter(|&idx| {
11663 column
11664 .value(idx)
11665 .and_then(|v| v.to_f64().ok())
11666 .is_some_and(|f| f <= value)
11667 })
11668 .collect()
11669 }
11670
11671 pub fn filter_gte(&mut self, column: &Column, value: f64) -> Vec<usize> {
11673 let split = self.crack_at(column, value);
11676 let mut result: Vec<usize> = self.perm[split..].to_vec();
11679 for &idx in &self.perm[..split] {
11680 if let Some(v) = column.value(idx)
11681 && let Ok(f) = v.to_f64()
11682 && f == value
11683 {
11684 result.push(idx);
11685 }
11686 }
11687 result
11688 }
11689
11690 pub fn filter_lt(&mut self, column: &Column, value: f64) -> Vec<usize> {
11692 let split = self.crack_at(column, value);
11693 self.perm[..split]
11695 .iter()
11696 .copied()
11697 .filter(|&idx| {
11698 column
11699 .value(idx)
11700 .and_then(|v| v.to_f64().ok())
11701 .is_some_and(|f| f < value)
11702 })
11703 .collect()
11704 }
11705
11706 pub fn filter_eq(&mut self, column: &Column, value: f64) -> Vec<usize> {
11708 let split = self.crack_at(column, value);
11709 self.perm[..split]
11711 .iter()
11712 .copied()
11713 .filter(|&idx| {
11714 column
11715 .value(idx)
11716 .and_then(|v| v.to_f64().ok())
11717 .is_some_and(|f| f == value)
11718 })
11719 .collect()
11720 }
11721
11722 fn crack_at(&mut self, column: &Column, value: f64) -> usize {
11725 if let Ok(pos) = self.cracks.binary_search_by(|probe| {
11727 probe
11728 .0
11729 .partial_cmp(&value)
11730 .unwrap_or(std::cmp::Ordering::Equal)
11731 }) {
11732 return self.cracks[pos].1;
11733 }
11734
11735 let (region_start, region_end) = self.find_region(value);
11737
11738 let split = self.partition_region(column, region_start, region_end, value);
11741
11742 let insert_pos = self
11744 .cracks
11745 .binary_search_by(|probe| {
11746 probe
11747 .0
11748 .partial_cmp(&value)
11749 .unwrap_or(std::cmp::Ordering::Equal)
11750 })
11751 .unwrap_or_else(|pos| pos);
11752 self.cracks.insert(insert_pos, (value, split));
11753
11754 split
11755 }
11756
11757 fn find_region(&self, value: f64) -> (usize, usize) {
11759 let mut start = 0;
11760 let mut end = self.perm.len();
11761
11762 for &(crack_val, crack_pos) in &self.cracks {
11763 if crack_val < value {
11764 start = start.max(crack_pos);
11765 } else {
11766 end = end.min(crack_pos);
11767 break;
11768 }
11769 }
11770
11771 (start, end)
11772 }
11773
11774 fn partition_region(&mut self, column: &Column, start: usize, end: usize, pivot: f64) -> usize {
11777 let region = &mut self.perm[start..end];
11779 let mut write = 0;
11780
11781 for read in 0..region.len() {
11782 let idx = region[read];
11783 let val = column
11784 .value(idx)
11785 .and_then(|v| v.to_f64().ok())
11786 .unwrap_or(f64::NEG_INFINITY); if val <= pivot {
11789 region.swap(write, read);
11790 write += 1;
11791 }
11792 }
11793
11794 start + write
11795 }
11796}
11797
11798#[cfg(test)]
11799mod tests {
11800 use fp_types::{DType, Interval, IntervalClosed, NullKind, Scalar, SparseDType};
11801
11802 use super::{
11803 ArithmeticOp, Column, ColumnData, ColumnError, ScalarValues, SparseColumn, ValidityMask,
11804 };
11805
11806 #[test]
11807 fn reindex_injects_missing_values() {
11808 let column = Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(20)])
11809 .expect("column should build");
11810
11811 let out = column
11812 .reindex_by_positions(&[Some(1), None, Some(0)])
11813 .expect("reindex should work");
11814
11815 assert_eq!(
11816 out.values(),
11817 &[
11818 Scalar::Int64(20),
11819 Scalar::Null(NullKind::Null),
11820 Scalar::Int64(10)
11821 ]
11822 );
11823 }
11824
11825 #[test]
11826 fn take_positions_matches_validated_materialization() {
11827 let column = Column::new(
11828 DType::Float64,
11829 vec![
11830 Scalar::Float64(1.5),
11831 Scalar::Null(NullKind::NaN),
11832 Scalar::Float64(3.5),
11833 ],
11834 )
11835 .expect("column should build");
11836
11837 let positions = [2, 1, 0, 2];
11838 let gathered = column.take_positions(&positions);
11839 let expected_values = positions
11840 .iter()
11841 .map(|&position| column.values()[position].clone())
11842 .collect::<Vec<_>>();
11843 let expected =
11844 Column::new(column.dtype(), expected_values).expect("validated materialization");
11845
11846 assert_eq!(gathered.dtype(), expected.dtype());
11847 assert_eq!(gathered.values(), expected.values());
11848 assert_eq!(gathered.validity(), expected.validity());
11849
11850 let empty = column.take_positions(&[]);
11851 assert_eq!(empty.dtype(), column.dtype());
11852 assert!(empty.values().is_empty());
11853 assert_eq!(empty.validity(), &ValidityMask::all_invalid(0));
11854 }
11855
11856 #[test]
11857 fn take_positions_all_valid_primitives_match_validated_materialization() {
11858 let cases = [
11859 (
11860 DType::Bool,
11861 vec![Scalar::Bool(false), Scalar::Bool(true), Scalar::Bool(false)],
11862 ),
11863 (
11864 DType::Int64,
11865 vec![Scalar::Int64(10), Scalar::Int64(-5), Scalar::Int64(42)],
11866 ),
11867 (
11868 DType::Float64,
11869 vec![
11870 Scalar::Float64(1.25),
11871 Scalar::Float64(-0.0),
11872 Scalar::Float64(9.5),
11873 ],
11874 ),
11875 (
11876 DType::Timedelta64,
11877 vec![
11878 Scalar::Timedelta64(10),
11879 Scalar::Timedelta64(-5),
11880 Scalar::Timedelta64(42),
11881 ],
11882 ),
11883 (
11884 DType::Datetime64,
11885 vec![
11886 Scalar::Datetime64(10),
11887 Scalar::Datetime64(-5),
11888 Scalar::Datetime64(42),
11889 ],
11890 ),
11891 (
11892 DType::Period,
11893 vec![Scalar::Period(10), Scalar::Period(-5), Scalar::Period(42)],
11894 ),
11895 ];
11896
11897 let positions = [2, 0, 2, 1];
11898 for (dtype, values) in cases {
11899 let column = Column::new(dtype, values).expect("column should build");
11900 let gathered = column.take_positions(&positions);
11901 let expected_values = positions
11902 .iter()
11903 .map(|&position| column.values()[position].clone())
11904 .collect::<Vec<_>>();
11905 let expected =
11906 Column::new(column.dtype(), expected_values).expect("validated materialization");
11907
11908 assert_eq!(gathered.dtype(), expected.dtype());
11909 assert_eq!(gathered.values(), expected.values());
11910 assert_eq!(gathered.validity(), expected.validity());
11911 }
11912 }
11913
11914 #[test]
11915 fn take_positions_preserves_exact_null_kind_contract() {
11916 for (dtype, real) in [
11928 (DType::Float64, Scalar::Float64(2.5)),
11929 (DType::Int64, Scalar::Int64(7)),
11930 ] {
11931 let source = Column::new(
11932 dtype,
11933 vec![
11934 Scalar::Null(NullKind::NaN),
11935 Scalar::Null(NullKind::NaT),
11936 Scalar::Null(NullKind::Null),
11937 real,
11938 ],
11939 )
11940 .expect("column builds");
11941 let stored = source.values().to_vec();
11945 let positions = [3, 2, 1, 0, 1];
11946 let gathered = source.take_positions(&positions);
11947 for (out_idx, &pos) in positions.iter().enumerate() {
11948 assert_eq!(
11949 gathered.values()[out_idx],
11950 stored[pos],
11951 "dtype {dtype:?}: take_positions must reproduce the exact stored scalar \
11952 (incl. NullKind) for source position {pos}",
11953 );
11954 assert_eq!(
11956 gathered.validity().get(out_idx),
11957 source.validity().get(pos),
11958 "dtype {dtype:?}: validity must follow the gathered position {pos}",
11959 );
11960 }
11961 }
11962 }
11963
11964 #[test]
11965 fn primitive_columns_cache_typed_data_for_take_positions() {
11966 let column = Column::new(
11967 DType::Float64,
11968 vec![
11969 Scalar::Float64(1.25),
11970 Scalar::Float64(-0.0),
11971 Scalar::Float64(9.5),
11972 ],
11973 )
11974 .expect("column should build");
11975
11976 assert!(matches!(column.data, Some(ColumnData::Float64(_))));
11977 let positions = [2, 0, 1, 2];
11978 let gathered = column.take_positions(&positions);
11979 let expected = Column::new(
11980 DType::Float64,
11981 positions
11982 .iter()
11983 .map(|&position| column.values()[position].clone())
11984 .collect(),
11985 )
11986 .expect("validated materialization");
11987
11988 assert_eq!(gathered.values(), expected.values());
11989 assert_eq!(gathered.validity(), expected.validity());
11990 }
11991
11992 #[test]
11993 fn float64_take_positions_defers_scalar_materialization() {
11994 let column = Column::new(
11995 DType::Float64,
11996 vec![
11997 Scalar::Float64(1.25),
11998 Scalar::Float64(-0.0),
11999 Scalar::Float64(9.5),
12000 ],
12001 )
12002 .expect("column should build");
12003
12004 let positions = [2, 0, 1, 2];
12005 let gathered = column.take_positions(&positions);
12006
12007 assert!(
12008 matches!(&gathered.values, ScalarValues::LazyAllValidFloat64 { .. }),
12009 "Float64 gather should defer scalar materialization"
12010 );
12011 if let ScalarValues::LazyAllValidFloat64 { data, values } = &gathered.values {
12012 assert_eq!(
12013 data.iter().map(|value| value.to_bits()).collect::<Vec<_>>(),
12014 vec![
12015 9.5f64.to_bits(),
12016 1.25f64.to_bits(),
12017 (-0.0f64).to_bits(),
12018 9.5f64.to_bits(),
12019 ]
12020 );
12021 assert!(values.get().is_none());
12022 }
12023 assert_eq!(gathered.len(), positions.len());
12024 assert_eq!(
12025 gathered.validity(),
12026 &ValidityMask::all_valid(positions.len())
12027 );
12028
12029 let expected = Column::new(
12030 DType::Float64,
12031 positions
12032 .iter()
12033 .map(|&position| column.values()[position].clone())
12034 .collect(),
12035 )
12036 .expect("validated materialization");
12037
12038 assert_eq!(gathered.values(), expected.values());
12039 assert!(
12040 matches!(&gathered.values, ScalarValues::LazyAllValidFloat64 { .. }),
12041 "Float64 gather should stay lazy after read"
12042 );
12043 if let ScalarValues::LazyAllValidFloat64 { values, .. } = &gathered.values {
12044 assert!(values.get().is_some());
12045 }
12046 assert_eq!(gathered.validity(), expected.validity());
12047 }
12048
12049 #[test]
12050 fn reindex_all_present_matches_materialization_and_keeps_float64_lazy() {
12051 let column = Column::from_f64_values(vec![1.25, -0.0, f64::INFINITY]);
12052
12053 let positions = [Some(2), Some(0), Some(1), Some(2)];
12054 let gathered = column
12055 .reindex_by_positions(&positions)
12056 .expect("all-present reindex should gather");
12057
12058 assert!(
12059 matches!(&gathered.values, ScalarValues::LazyAllValidFloat64 { .. }),
12060 "all-present Float64 reindex should defer scalar materialization"
12061 );
12062 if let ScalarValues::LazyAllValidFloat64 { data, values } = &gathered.values {
12063 assert_eq!(
12064 data.iter().map(|value| value.to_bits()).collect::<Vec<_>>(),
12065 vec![
12066 f64::INFINITY.to_bits(),
12067 1.25f64.to_bits(),
12068 (-0.0f64).to_bits(),
12069 f64::INFINITY.to_bits(),
12070 ]
12071 );
12072 assert!(values.get().is_none());
12073 }
12074
12075 let expected = Column::new(
12076 DType::Float64,
12077 positions
12078 .iter()
12079 .map(|&position| column.values()[position.expect("present position")].clone())
12080 .collect(),
12081 )
12082 .expect("validated scalar materialization");
12083
12084 assert_eq!(gathered.dtype(), expected.dtype());
12085 assert_eq!(gathered.values(), expected.values());
12086 assert_eq!(gathered.validity(), expected.validity());
12087 }
12088
12089 #[test]
12090 fn column_equality_ignores_skipped_typed_cache() {
12091 let column = Column::new(
12092 DType::Int64,
12093 vec![Scalar::Int64(10), Scalar::Int64(20), Scalar::Int64(30)],
12094 )
12095 .expect("column should build");
12096
12097 let json = serde_json::to_string(&column).expect("serialize");
12098 let roundtrip: Column = serde_json::from_str(&json).expect("deserialize");
12099
12100 assert!(column.data.is_some());
12101 assert!(roundtrip.data.is_none());
12102 assert_eq!(column, roundtrip);
12103 }
12104
12105 #[test]
12106 fn column_clone_preserves_values_without_copying_private_cache() {
12107 let column = Column::new(
12108 DType::Int64,
12109 vec![Scalar::Int64(10), Scalar::Int64(20), Scalar::Int64(30)],
12110 )
12111 .expect("column should build");
12112
12113 let cloned = column.clone();
12114
12115 assert!(column.data.is_some());
12116 assert!(cloned.data.is_none());
12117 assert_eq!(column, cloned);
12118 }
12119
12120 #[test]
12121 fn dense_primitive_clone_defers_float64_scalar_materialization_from_typed_cache() {
12122 let column = Column::new(
12123 DType::Float64,
12124 vec![
12125 Scalar::Float64(1.5),
12126 Scalar::Float64(-0.0),
12127 Scalar::Float64(3.25),
12128 ],
12129 )
12130 .expect("column should build");
12131
12132 let cloned_values = column
12133 .clone_dense_values_from_cache()
12134 .expect("all-valid Float64 typed cache should clone");
12135 assert!(
12136 matches!(&cloned_values, ScalarValues::LazyAllValidFloat64 { .. }),
12137 "Float64 clone should defer scalar materialization"
12138 );
12139 if let ScalarValues::LazyAllValidFloat64 { data, values } = &cloned_values {
12140 assert_eq!(
12141 data.iter().map(|value| value.to_bits()).collect::<Vec<_>>(),
12142 vec![1.5f64.to_bits(), (-0.0f64).to_bits(), 3.25f64.to_bits()]
12143 );
12144 assert!(values.get().is_none());
12145 }
12146
12147 let cloned = column.clone();
12148 assert!(
12149 matches!(&cloned.values, ScalarValues::LazyAllValidFloat64 { .. }),
12150 "Column::clone should keep all-valid Float64 clone values lazy"
12151 );
12152 if let ScalarValues::LazyAllValidFloat64 { values, .. } = &cloned.values {
12153 assert!(values.get().is_none());
12154 }
12155 assert_eq!(cloned.values(), column.values());
12156 if let ScalarValues::LazyAllValidFloat64 { values, .. } = &cloned.values {
12157 assert!(values.get().is_some());
12158 }
12159 assert_eq!(cloned.validity(), column.validity());
12160 assert!(cloned.data.is_none());
12161 }
12162
12163 #[test]
12164 fn dense_primitive_clone_falls_back_for_missing_values() {
12165 let column = Column::new(
12166 DType::Float64,
12167 vec![
12168 Scalar::Float64(1.5),
12169 Scalar::Null(NullKind::NaN),
12170 Scalar::Null(NullKind::Null),
12171 ],
12172 )
12173 .expect("column should build");
12174
12175 assert!(column.clone_dense_values_from_cache().is_none());
12176 let cloned = column.clone();
12177 assert_eq!(cloned.values(), column.values());
12178 assert_eq!(cloned.validity(), column.validity());
12179 assert!(cloned.data.is_none());
12180 }
12181
12182 #[test]
12183 fn numeric_addition_propagates_missing() {
12184 let left = Column::from_values(vec![
12185 Scalar::Int64(1),
12186 Scalar::Null(NullKind::Null),
12187 Scalar::Float64(f64::NAN),
12188 ])
12189 .expect("left");
12190 let right = Column::from_values(vec![Scalar::Int64(2), Scalar::Int64(5), Scalar::Int64(3)])
12191 .expect("right");
12192
12193 let out = left
12194 .binary_numeric(&right, ArithmeticOp::Add)
12195 .expect("add should pass");
12196
12197 assert_eq!(out.values()[0], Scalar::Float64(3.0));
12198 assert_eq!(out.values()[1], Scalar::Null(NullKind::NaN));
12199 assert_eq!(out.values()[2], Scalar::Null(NullKind::NaN));
12200 }
12201
12202 #[test]
12203 fn sparse_column_omits_fill_values_and_materializes_dense() {
12204 let dtype = SparseDType::new(DType::Int64, Scalar::Int64(0)).expect("sparse dtype");
12205 let sparse = SparseColumn::from_dense(
12206 dtype,
12207 vec![
12208 Scalar::Int64(0),
12209 Scalar::Int64(5),
12210 Scalar::Int64(0),
12211 Scalar::Int64(-2),
12212 ],
12213 )
12214 .expect("sparse column");
12215
12216 assert_eq!(sparse.value_dtype(), DType::Int64);
12217 assert_eq!(sparse.fill_value(), &Scalar::Int64(0));
12218 assert_eq!(sparse.len(), 4);
12219 assert_eq!(sparse.npoints(), 2);
12220 assert_eq!(sparse.indices(), &[1, 3]);
12221 assert_eq!(
12222 sparse.stored_values(),
12223 &[Scalar::Int64(5), Scalar::Int64(-2)]
12224 );
12225
12226 let dense = sparse.to_dense_column().expect("dense column");
12227 assert_eq!(dense.dtype(), DType::Int64);
12228 assert_eq!(
12229 dense.values(),
12230 &[
12231 Scalar::Int64(0),
12232 Scalar::Int64(5),
12233 Scalar::Int64(0),
12234 Scalar::Int64(-2),
12235 ]
12236 );
12237 }
12238
12239 #[test]
12240 fn sparse_column_preserves_nulls_when_fill_is_not_missing() {
12241 let dtype = SparseDType::new(DType::Float64, Scalar::Float64(0.0)).expect("sparse dtype");
12242 let sparse = SparseColumn::from_dense(
12243 dtype,
12244 vec![
12245 Scalar::Float64(0.0),
12246 Scalar::Null(NullKind::NaN),
12247 Scalar::Float64(2.5),
12248 ],
12249 )
12250 .expect("sparse column");
12251
12252 assert_eq!(sparse.indices(), &[1, 2]);
12253 assert_eq!(sparse.npoints(), 2);
12254 assert!((sparse.density() - (2.0 / 3.0)).abs() < f64::EPSILON);
12255 assert!(sparse.stored_values()[0].is_missing());
12256 assert_eq!(sparse.stored_values()[1], Scalar::Float64(2.5));
12257
12258 let dense = sparse.to_dense_column().expect("dense column");
12259 assert_eq!(
12260 dense.values(),
12261 &[
12262 Scalar::Float64(0.0),
12263 Scalar::Null(NullKind::NaN),
12264 Scalar::Float64(2.5),
12265 ]
12266 );
12267 }
12268
12269 #[test]
12270 fn sparse_column_missing_fill_omits_missing_values() {
12271 let dtype =
12272 SparseDType::new(DType::Float64, Scalar::Null(NullKind::NaN)).expect("sparse dtype");
12273 let sparse = SparseColumn::from_dense(
12274 dtype,
12275 vec![
12276 Scalar::Null(NullKind::Null),
12277 Scalar::Float64(1.5),
12278 Scalar::Float64(f64::NAN),
12279 ],
12280 )
12281 .expect("sparse column");
12282
12283 assert_eq!(sparse.fill_value(), &Scalar::Null(NullKind::NaN));
12284 assert_eq!(sparse.indices(), &[1]);
12285 assert_eq!(sparse.stored_values(), &[Scalar::Float64(1.5)]);
12286 assert_eq!(
12287 sparse.to_dense_values(),
12288 vec![
12289 Scalar::Null(NullKind::NaN),
12290 Scalar::Float64(1.5),
12291 Scalar::Null(NullKind::NaN),
12292 ]
12293 );
12294 }
12295
12296 #[test]
12299 fn validity_mask_from_values_packs_correctly() {
12300 let values = vec![
12301 Scalar::Int64(1),
12302 Scalar::Null(NullKind::Null),
12303 Scalar::Int64(3),
12304 ];
12305 let mask = ValidityMask::from_values(&values);
12306 assert_eq!(mask.len(), 3);
12307 assert!(mask.get(0));
12308 assert!(!mask.get(1));
12309 assert!(mask.get(2));
12310 assert_eq!(mask.count_valid(), 2);
12311 }
12312
12313 #[test]
12314 fn validity_mask_all_valid() {
12315 let mask = ValidityMask::all_valid(100);
12316 assert_eq!(mask.len(), 100);
12317 assert_eq!(mask.count_valid(), 100);
12318 assert!(
12319 mask.words.is_empty(),
12320 "all-valid masks store only the logical length"
12321 );
12322 for i in 0..100 {
12323 assert!(mask.get(i), "bit {i} should be valid");
12324 }
12325 }
12326
12327 #[test]
12328 fn validity_mask_all_valid_sentinel_matches_explicit_words() {
12329 for len in [1, 2, 63, 64, 65, 127, 128, 129] {
12330 let sentinel = ValidityMask::all_valid(len);
12331 let explicit =
12332 ValidityMask::from_words(ValidityMask::materialized_all_valid_words(len), len);
12333
12334 assert_eq!(sentinel, explicit, "len {len}");
12335 assert_eq!(
12336 sentinel.bits().collect::<Vec<_>>(),
12337 explicit.bits().collect::<Vec<_>>(),
12338 "len {len}"
12339 );
12340 assert!(sentinel.all(), "len {len}");
12341 assert_eq!(sentinel.count_invalid(), 0, "len {len}");
12342 }
12343 }
12344
12345 #[test]
12346 fn validity_mask_all_valid_sentinel_materializes_on_clear() {
12347 let mut mask = ValidityMask::all_valid(130);
12348 mask.set(64, true);
12349 assert!(
12350 mask.words.is_empty(),
12351 "setting a valid bit preserves the sentinel"
12352 );
12353
12354 mask.set(64, false);
12355 assert!(!mask.words.is_empty(), "clearing a bit materializes words");
12356 assert_eq!(mask.len(), 130);
12357 assert_eq!(mask.count_valid(), 129);
12358 assert!(!mask.get(64));
12359 assert!(mask.get(63));
12360 assert!(mask.get(65));
12361 assert_eq!(mask.bits().filter(|valid| *valid).count(), 129);
12362 }
12363
12364 #[test]
12365 fn validity_mask_all_invalid() {
12366 let mask = ValidityMask::all_invalid(100);
12367 assert_eq!(mask.len(), 100);
12368 assert_eq!(mask.count_valid(), 0);
12369 for i in 0..100 {
12370 assert!(!mask.get(i), "bit {i} should be invalid");
12371 }
12372 }
12373
12374 #[test]
12375 fn validity_mask_set_and_get() {
12376 let mut mask = ValidityMask::all_invalid(128);
12377 mask.set(0, true);
12378 mask.set(63, true);
12379 mask.set(64, true);
12380 mask.set(127, true);
12381 assert!(mask.get(0));
12382 assert!(mask.get(63));
12383 assert!(mask.get(64));
12384 assert!(mask.get(127));
12385 assert!(!mask.get(1));
12386 assert_eq!(mask.count_valid(), 4);
12387
12388 mask.set(63, false);
12389 assert!(!mask.get(63));
12390 assert_eq!(mask.count_valid(), 3);
12391 }
12392
12393 #[test]
12394 fn validity_mask_and_or_not() {
12395 let mut a = ValidityMask::all_invalid(4);
12396 a.set(0, true);
12397 a.set(1, true);
12398
12399 let mut b = ValidityMask::all_invalid(4);
12400 b.set(1, true);
12401 b.set(2, true);
12402
12403 let and = a.and_mask(&b);
12404 assert!(and.get(1));
12405 assert!(!and.get(0));
12406 assert!(!and.get(2));
12407 assert_eq!(and.count_valid(), 1);
12408
12409 let or = a.or_mask(&b);
12410 assert!(or.get(0));
12411 assert!(or.get(1));
12412 assert!(or.get(2));
12413 assert!(!or.get(3));
12414 assert_eq!(or.count_valid(), 3);
12415
12416 let not_a = a.not_mask();
12417 assert!(!not_a.get(0));
12418 assert!(!not_a.get(1));
12419 assert!(not_a.get(2));
12420 assert!(not_a.get(3));
12421 assert_eq!(not_a.count_valid(), 2);
12422 }
12423
12424 #[test]
12425 fn validity_mask_sentinel_mask_algebra_matches_explicit_bitmap() {
12426 let all = ValidityMask::all_valid(5);
12427 let nullable = ValidityMask::from_values(&[
12428 Scalar::Int64(1),
12429 Scalar::Null(NullKind::Null),
12430 Scalar::Int64(3),
12431 Scalar::Null(NullKind::NaN),
12432 Scalar::Int64(5),
12433 ]);
12434
12435 assert_eq!(all.and_mask(&nullable), nullable);
12436 assert_eq!(nullable.and_mask(&all), nullable);
12437 assert_eq!(all.or_mask(&nullable), all);
12438 assert_eq!(nullable.or_mask(&all), all);
12439 assert_eq!(
12440 all.xor_mask(&nullable).bits().collect::<Vec<_>>(),
12441 vec![false, true, false, true, false]
12442 );
12443 assert_eq!(
12444 all.not_mask().bits().collect::<Vec<_>>(),
12445 vec![false, false, false, false, false]
12446 );
12447 assert_eq!(
12448 all.slice(1, 3).bits().collect::<Vec<_>>(),
12449 vec![true, true, true]
12450 );
12451 assert_eq!(
12452 all.concat(&ValidityMask::all_valid(2)),
12453 ValidityMask::all_valid(7)
12454 );
12455 }
12456
12457 #[test]
12458 fn validity_mask_bits_iterator() {
12459 let values = vec![
12460 Scalar::Int64(1),
12461 Scalar::Null(NullKind::Null),
12462 Scalar::Int64(3),
12463 Scalar::Float64(f64::NAN),
12464 ];
12465 let mask = ValidityMask::from_values(&values);
12466 let bits: Vec<bool> = mask.bits().collect();
12467 assert_eq!(bits, vec![true, false, true, false]);
12468 }
12469
12470 #[test]
12471 fn validity_mask_serde_round_trip() {
12472 let values = vec![
12473 Scalar::Int64(1),
12474 Scalar::Null(NullKind::Null),
12475 Scalar::Int64(3),
12476 ];
12477 let mask = ValidityMask::from_values(&values);
12478 let json = serde_json::to_string(&mask).expect("serialize");
12479 let back: ValidityMask = serde_json::from_str(&json).expect("deserialize");
12480 assert_eq!(mask, back);
12481 assert!(json.contains("\"bits\""), "should serialize as bits field");
12483 }
12484
12485 #[test]
12486 fn validity_mask_empty() {
12487 let mask = ValidityMask::from_values(&[]);
12488 assert!(mask.is_empty());
12489 assert_eq!(mask.len(), 0);
12490 assert_eq!(mask.count_valid(), 0);
12491 assert_eq!(mask.bits().count(), 0);
12492 }
12493
12494 #[test]
12495 fn validity_mask_count_invalid_matches_complement() {
12496 let mask = ValidityMask::from_values(&[
12497 Scalar::Int64(1),
12498 Scalar::Null(NullKind::NaN),
12499 Scalar::Int64(2),
12500 Scalar::Null(NullKind::Null),
12501 Scalar::Int64(3),
12502 ]);
12503 assert_eq!(mask.count_valid(), 3);
12504 assert_eq!(mask.count_invalid(), 2);
12505 assert_eq!(mask.count_valid() + mask.count_invalid(), mask.len());
12506 }
12507
12508 #[test]
12509 fn validity_mask_any_and_all() {
12510 let all_set = ValidityMask::all_valid(4);
12511 assert!(all_set.any());
12512 assert!(all_set.all());
12513
12514 let none_set = ValidityMask::all_invalid(4);
12515 assert!(!none_set.any());
12516 assert!(!none_set.all());
12517
12518 let mixed = ValidityMask::from_values(&[Scalar::Int64(1), Scalar::Null(NullKind::NaN)]);
12519 assert!(mixed.any());
12520 assert!(!mixed.all());
12521
12522 let empty = ValidityMask::all_invalid(0);
12523 assert!(!empty.any());
12524 assert!(empty.all()); }
12526
12527 #[test]
12528 fn validity_mask_xor_finds_differences() {
12529 let a = ValidityMask::from_values(&[
12530 Scalar::Int64(1),
12531 Scalar::Int64(2),
12532 Scalar::Null(NullKind::NaN),
12533 Scalar::Int64(4),
12534 ]);
12535 let b = ValidityMask::from_values(&[
12536 Scalar::Int64(1),
12537 Scalar::Null(NullKind::NaN),
12538 Scalar::Null(NullKind::NaN),
12539 Scalar::Int64(4),
12540 ]);
12541 let diff = a.xor_mask(&b);
12542 assert_eq!(diff.len(), 4);
12543 assert!(!diff.get(0));
12548 assert!(diff.get(1));
12549 assert!(!diff.get(2));
12550 assert!(!diff.get(3));
12551 }
12552
12553 #[test]
12554 fn validity_mask_slice_extracts_range() {
12555 let mask = ValidityMask::from_values(&[
12556 Scalar::Int64(1), Scalar::Null(NullKind::NaN), Scalar::Int64(3), Scalar::Int64(4), Scalar::Null(NullKind::NaN), ]);
12562 let sub = mask.slice(1, 3);
12563 assert_eq!(sub.len(), 3);
12564 assert!(!sub.get(0));
12565 assert!(sub.get(1));
12566 assert!(sub.get(2));
12567 }
12568
12569 #[test]
12570 fn validity_mask_slice_past_end_clamps() {
12571 let mask = ValidityMask::all_valid(3);
12572 let sub = mask.slice(2, 10);
12573 assert_eq!(sub.len(), 1);
12574 assert!(sub.get(0));
12575
12576 let empty = mask.slice(100, 5);
12577 assert!(empty.is_empty());
12578 }
12579
12580 #[test]
12581 fn validity_mask_concat_appends() {
12582 let a = ValidityMask::from_values(&[Scalar::Int64(1), Scalar::Null(NullKind::NaN)]);
12583 let b = ValidityMask::from_values(&[Scalar::Int64(2), Scalar::Int64(3)]);
12584 let merged = a.concat(&b);
12585 assert_eq!(merged.len(), 4);
12586 assert!(merged.get(0));
12587 assert!(!merged.get(1));
12588 assert!(merged.get(2));
12589 assert!(merged.get(3));
12590 }
12591
12592 #[test]
12593 fn validity_mask_first_last_valid() {
12594 let mask = ValidityMask::from_values(&[
12595 Scalar::Null(NullKind::NaN),
12596 Scalar::Null(NullKind::NaN),
12597 Scalar::Int64(1),
12598 Scalar::Int64(2),
12599 Scalar::Null(NullKind::NaN),
12600 ]);
12601 assert_eq!(mask.first_valid(), Some(2));
12602 assert_eq!(mask.last_valid(), Some(3));
12603
12604 let none_set = ValidityMask::all_invalid(3);
12605 assert_eq!(none_set.first_valid(), None);
12606 assert_eq!(none_set.last_valid(), None);
12607 }
12608
12609 #[test]
12610 fn validity_mask_boundary_65_elements() {
12611 let mut values = vec![Scalar::Int64(1); 65];
12612 values[64] = Scalar::Null(NullKind::Null);
12613 let mask = ValidityMask::from_values(&values);
12614 assert_eq!(mask.len(), 65);
12615 assert_eq!(mask.count_valid(), 64);
12616 assert!(mask.get(63));
12617 assert!(!mask.get(64));
12618 }
12619
12620 #[test]
12621 fn validity_mask_equality() {
12622 let a = ValidityMask::from_values(&[Scalar::Int64(1), Scalar::Null(NullKind::Null)]);
12623 let b = ValidityMask::from_values(&[Scalar::Int64(1), Scalar::Null(NullKind::Null)]);
12624 let c = ValidityMask::from_values(&[Scalar::Null(NullKind::Null), Scalar::Int64(1)]);
12625 assert_eq!(a, b);
12626 assert_ne!(a, c);
12627 }
12628
12629 #[test]
12630 fn validity_mask_nan_is_invalid() {
12631 let values = vec![
12632 Scalar::Float64(1.0),
12633 Scalar::Float64(f64::NAN),
12634 Scalar::Null(NullKind::NaN),
12635 ];
12636 let mask = ValidityMask::from_values(&values);
12637 assert!(mask.get(0));
12638 assert!(!mask.get(1), "Float64(NaN) should be invalid");
12639 assert!(!mask.get(2), "Null(NaN) should be invalid");
12640 assert_eq!(mask.count_valid(), 1);
12641 }
12642
12643 #[test]
12644 fn validity_mask_dense_null_half() {
12645 let values: Vec<Scalar> = (0..1000)
12646 .map(|i| {
12647 if i % 2 == 0 {
12648 Scalar::Int64(i)
12649 } else {
12650 Scalar::Null(NullKind::Null)
12651 }
12652 })
12653 .collect();
12654 let mask = ValidityMask::from_values(&values);
12655 assert_eq!(mask.len(), 1000);
12656 assert_eq!(mask.count_valid(), 500);
12657 }
12658
12659 #[test]
12662 fn column_data_float64_roundtrip() {
12663 let values = vec![
12664 Scalar::Float64(1.5),
12665 Scalar::Null(NullKind::NaN),
12666 Scalar::Float64(3.0),
12667 ];
12668 let validity = ValidityMask::from_values(&values);
12669 let data = super::ColumnData::from_scalars(&values, fp_types::DType::Float64);
12670 let back = data.to_scalars(fp_types::DType::Float64, &validity);
12671 assert_eq!(back.len(), 3);
12672 assert_eq!(back[0], Scalar::Float64(1.5));
12673 assert!(back[1].is_nan(), "position 1 should be NaN-missing");
12674 assert_eq!(back[2], Scalar::Float64(3.0));
12675 }
12676
12677 #[test]
12678 fn column_data_int64_roundtrip() {
12679 let values = vec![
12680 Scalar::Int64(10),
12681 Scalar::Null(NullKind::Null),
12682 Scalar::Int64(30),
12683 ];
12684 let validity = ValidityMask::from_values(&values);
12685 let data = super::ColumnData::from_scalars(&values, fp_types::DType::Int64);
12686 assert_eq!(data.len(), 3);
12687 let back = data.to_scalars(fp_types::DType::Int64, &validity);
12688 assert_eq!(back[0], Scalar::Int64(10));
12689 assert!(back[1].is_missing());
12690 assert_eq!(back[2], Scalar::Int64(30));
12691 }
12692
12693 #[test]
12694 fn column_data_interval_roundtrip_and_column_uniques_5g5uj() {
12695 let first = Interval::new(0.0, 1.0, IntervalClosed::Right);
12696 let second = Interval::new(1.0, 2.0, IntervalClosed::Right);
12697 let values = vec![
12698 Scalar::Interval(first),
12699 Scalar::Null(NullKind::Null),
12700 Scalar::Interval(second),
12701 Scalar::Interval(first),
12702 ];
12703 let validity = ValidityMask::from_values(&values);
12704 let data = super::ColumnData::from_scalars(&values, DType::Interval);
12705 assert_eq!(data.len(), 4);
12706 let back = data.to_scalars(DType::Interval, &validity);
12707 assert_eq!(back[0], Scalar::Interval(first));
12708 assert!(back[1].is_missing());
12709 assert_eq!(back[2], Scalar::Interval(second));
12710 assert_eq!(back[3], Scalar::Interval(first));
12711
12712 let column = Column::new(DType::Interval, values).expect("interval column");
12713 assert_eq!(column.dtype(), DType::Interval);
12714 assert!(column.has_duplicates());
12715 let uniques = column.unique().expect("unique intervals");
12716 assert_eq!(
12717 uniques.values(),
12718 &[Scalar::Interval(first), Scalar::Interval(second)]
12719 );
12720 }
12721
12722 #[test]
12723 fn vectorized_f64_addition_matches_scalar() {
12724 let left = Column::from_values(vec![
12725 Scalar::Float64(1.0),
12726 Scalar::Float64(2.0),
12727 Scalar::Float64(3.0),
12728 ])
12729 .expect("left");
12730 let right = Column::from_values(vec![
12731 Scalar::Float64(10.0),
12732 Scalar::Float64(20.0),
12733 Scalar::Float64(30.0),
12734 ])
12735 .expect("right");
12736
12737 let result = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
12738 assert_eq!(result.values()[0], Scalar::Float64(11.0));
12739 assert_eq!(result.values()[1], Scalar::Float64(22.0));
12740 assert_eq!(result.values()[2], Scalar::Float64(33.0));
12741 }
12742
12743 #[test]
12744 fn vectorized_i64_addition_matches_scalar() {
12745 let left = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
12746 .expect("left");
12747 let right = Column::from_values(vec![
12748 Scalar::Int64(10),
12749 Scalar::Int64(20),
12750 Scalar::Int64(30),
12751 ])
12752 .expect("right");
12753
12754 let result = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
12755 assert_eq!(result.values()[0], Scalar::Int64(11));
12756 assert_eq!(result.values()[1], Scalar::Int64(22));
12757 assert_eq!(result.values()[2], Scalar::Int64(33));
12758 }
12759
12760 #[test]
12761 fn vectorized_binary_all_valid_keeps_typed_output_lazy() {
12762 let left = Column::from_f64_values(vec![1.0, 2.0, 3.0]);
12763 let right = Column::from_f64_values(vec![10.0, 20.0, 30.0]);
12764
12765 let result = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
12766
12767 assert!(result.validity().all());
12768 assert_eq!(result.as_f64_slice(), Some([11.0, 22.0, 33.0].as_slice()));
12769 assert!(matches!(
12770 &result.values,
12771 ScalarValues::LazyAllValidFloat64 { values, .. } if values.get().is_none()
12772 ));
12773 }
12774
12775 #[test]
12776 fn vectorized_binary_operation_nan_matches_scalar_validity() {
12777 let left = Column::from_f64_values(vec![f64::INFINITY]);
12778 let right = Column::from_f64_values(vec![f64::INFINITY]);
12779
12780 let result = left.binary_numeric(&right, ArithmeticOp::Sub).expect("sub");
12781
12782 assert!(!result.validity().get(0));
12783 assert!(matches!(result.values()[0], Scalar::Float64(v) if v.is_nan()));
12784 }
12785
12786 #[test]
12787 fn vectorized_f64_with_nulls_propagates_missing() {
12788 let left = Column::from_values(vec![
12789 Scalar::Float64(1.0),
12790 Scalar::Null(NullKind::NaN),
12791 Scalar::Float64(3.0),
12792 ])
12793 .expect("left");
12794 let right = Column::from_values(vec![
12795 Scalar::Float64(10.0),
12796 Scalar::Float64(20.0),
12797 Scalar::Null(NullKind::NaN),
12798 ])
12799 .expect("right");
12800
12801 let result = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
12802 assert_eq!(result.values()[0], Scalar::Float64(11.0));
12803 assert!(result.values()[1].is_nan(), "null+valid should be NaN");
12804 assert!(result.values()[2].is_nan(), "valid+null should be NaN");
12805 }
12806
12807 #[test]
12808 fn aligned_binary_f64_matches_reindex_then_binary_numeric() {
12809 let left = Column::new(
12810 DType::Float64,
12811 vec![
12812 Scalar::Float64(1.0),
12813 Scalar::Float64(f64::NAN),
12814 Scalar::Float64(3.5),
12815 ],
12816 )
12817 .expect("left");
12818 let right = Column::new(
12819 DType::Float64,
12820 vec![
12821 Scalar::Float64(10.0),
12822 Scalar::Float64(20.0),
12823 Scalar::Null(NullKind::NaN),
12824 ],
12825 )
12826 .expect("right");
12827 let left_positions = [Some(2), None, Some(1), Some(0)];
12828 let right_positions = [None, Some(0), Some(2), Some(1)];
12829
12830 let expected_left = left
12831 .reindex_by_positions(&left_positions)
12832 .expect("left reindex");
12833 let expected_right = right
12834 .reindex_by_positions(&right_positions)
12835 .expect("right reindex");
12836 let expected = expected_left
12837 .binary_numeric(&expected_right, ArithmeticOp::Add)
12838 .expect("generic add");
12839 let actual = left
12840 .aligned_binary_f64(&right, &left_positions, &right_positions, ArithmeticOp::Add)
12841 .expect("aligned add");
12842
12843 assert_eq!(actual.dtype(), expected.dtype());
12844 assert_eq!(actual.values(), expected.values());
12845 assert_eq!(actual.validity().len(), expected.validity().len());
12846 for idx in 0..actual.len() {
12847 assert_eq!(actual.validity().get(idx), expected.validity().get(idx));
12848 }
12849 }
12850
12851 #[test]
12852 fn aligned_binary_f64_all_valid_keeps_typed_output_lazy() {
12853 let left = Column::from_f64_values(vec![1.0, 2.0, 3.0]);
12854 let right = Column::from_f64_values(vec![10.0, 20.0, 30.0]);
12855 let left_positions = [Some(0), Some(1), Some(2)];
12856 let right_positions = [Some(0), Some(1), Some(2)];
12857
12858 let actual = left
12859 .aligned_binary_f64(&right, &left_positions, &right_positions, ArithmeticOp::Add)
12860 .expect("aligned add");
12861
12862 assert!(actual.validity().all());
12863 assert_eq!(actual.as_f64_slice(), Some([11.0, 22.0, 33.0].as_slice()));
12864 assert!(matches!(
12865 &actual.values,
12866 ScalarValues::LazyAllValidFloat64 { values, .. } if values.get().is_none()
12867 ));
12868 }
12869
12870 #[test]
12871 fn aligned_binary_f64_nullable_gaps_keep_typed_output_lazy() {
12872 let left = Column::from_f64_values(vec![1.0, 2.0, 3.0]);
12873 let right = Column::from_f64_values(vec![10.0, 20.0, 30.0]);
12874 let left_positions = [Some(0), Some(1), Some(2), None];
12875 let right_positions = [None, Some(0), Some(1), Some(2)];
12876
12877 let expected_left = left
12878 .reindex_by_positions(&left_positions)
12879 .expect("left reindex");
12880 let expected_right = right
12881 .reindex_by_positions(&right_positions)
12882 .expect("right reindex");
12883 let expected = expected_left
12884 .binary_numeric(&expected_right, ArithmeticOp::Add)
12885 .expect("generic add");
12886 let actual = left
12887 .aligned_binary_f64(&right, &left_positions, &right_positions, ArithmeticOp::Add)
12888 .expect("aligned add");
12889
12890 assert_eq!(actual.dtype(), expected.dtype());
12891 assert_eq!(actual.validity(), expected.validity());
12892 assert!(matches!(
12893 &actual.values,
12894 ScalarValues::LazyNullableFloat64 { values, .. } if values.get().is_none()
12895 ));
12896 assert_eq!(actual.values(), expected.values());
12897 }
12898
12899 #[test]
12900 fn aligned_binary_f64_int64_unit_ranges_matches_position_alignment() {
12901 let left = Column::from_f64_values(vec![1.0, 2.0, 3.0]);
12902 let right = Column::from_f64_values(vec![10.0, 20.0, 30.0]);
12903 let left_positions = [Some(0), Some(1), Some(2), None];
12904 let right_positions = [None, Some(0), Some(1), Some(2)];
12905
12906 let expected = left
12907 .aligned_binary_f64(&right, &left_positions, &right_positions, ArithmeticOp::Add)
12908 .expect("position aligned add");
12909 let actual = left
12910 .aligned_binary_f64_int64_unit_ranges(&right, (0, 2), (1, 3), (0, 3), ArithmeticOp::Add)
12911 .expect("unit range aligned add");
12912
12913 assert_eq!(actual.dtype(), expected.dtype());
12914 assert_eq!(actual.validity(), expected.validity());
12915 assert!(matches!(
12916 &actual.values,
12917 ScalarValues::LazyNullableFloat64 { values, .. } if values.get().is_none()
12918 ));
12919 assert_eq!(actual.values(), expected.values());
12920 }
12921
12922 #[test]
12923 fn aligned_binary_f64_operation_nan_keeps_float_nan_materialization() {
12924 let left = Column::from_f64_values(vec![f64::INFINITY]);
12925 let right = Column::from_f64_values(vec![f64::INFINITY]);
12926 let positions = [Some(0)];
12927
12928 let actual = left
12929 .aligned_binary_f64(&right, &positions, &positions, ArithmeticOp::Sub)
12930 .expect("aligned sub");
12931
12932 assert!(!actual.validity().get(0));
12933 assert!(matches!(
12934 &actual.values,
12935 ScalarValues::LazyNullableFloat64 { values, .. } if values.get().is_none()
12936 ));
12937 assert!(matches!(actual.values()[0], Scalar::Float64(value) if value.is_nan()));
12938 }
12939
12940 #[test]
12941 fn apply_f64_slices_matches_fn_pointer_per_element_f64simd() {
12942 let vals = [
12947 0.0_f64,
12948 -0.0,
12949 1.0,
12950 -1.0,
12951 2.5,
12952 -3.0,
12953 4.0,
12954 0.5,
12955 f64::NAN,
12956 f64::INFINITY,
12957 f64::NEG_INFINITY,
12958 1e300,
12959 -1e-300,
12960 ];
12961 let a: Vec<f64> = vals.to_vec();
12962 for op in [
12963 ArithmeticOp::Add,
12964 ArithmeticOp::Sub,
12965 ArithmeticOp::Mul,
12966 ArithmeticOp::Div,
12967 ArithmeticOp::Mod,
12968 ArithmeticOp::Pow,
12969 ArithmeticOp::FloorDiv,
12970 ] {
12971 for shift in 0..vals.len() {
12972 let b: Vec<f64> = (0..vals.len())
12973 .map(|i| vals[(i + shift) % vals.len()])
12974 .collect();
12975 let got = super::apply_f64_slices(op, &a, &b);
12976 let apply = super::binary_f64_apply(op);
12977 let expected: Vec<f64> = a.iter().zip(&b).map(|(x, y)| apply(*x, *y)).collect();
12978 for i in 0..a.len() {
12979 assert_eq!(
12980 got[i].to_bits(),
12981 expected[i].to_bits(),
12982 "op={op:?} a={} b={}",
12983 a[i],
12984 b[i]
12985 );
12986 }
12987 }
12988 }
12989 }
12990
12991 #[test]
12992 fn aligned_binary_f64_same_positions_matches_identity_alignment() {
12993 let left = Column::new(
12994 DType::Float64,
12995 vec![
12996 Scalar::Float64(1.0),
12997 Scalar::Float64(f64::NAN),
12998 Scalar::Float64(3.0),
12999 ],
13000 )
13001 .expect("left");
13002 let right = Column::new(
13003 DType::Float64,
13004 vec![
13005 Scalar::Float64(10.0),
13006 Scalar::Float64(20.0),
13007 Scalar::Null(NullKind::NaN),
13008 ],
13009 )
13010 .expect("right");
13011 let positions = [Some(0), Some(1), Some(2)];
13012
13013 let expected = left
13014 .aligned_binary_f64(&right, &positions, &positions, ArithmeticOp::Add)
13015 .expect("identity aligned add");
13016 let actual = left
13017 .aligned_binary_f64_same_positions(&right, ArithmeticOp::Add)
13018 .expect("same-position add");
13019
13020 assert_eq!(actual.dtype(), expected.dtype());
13021 assert_eq!(actual.values(), expected.values());
13022 for idx in 0..actual.len() {
13023 assert_eq!(actual.validity().get(idx), expected.validity().get(idx));
13024 }
13025 }
13026
13027 #[test]
13028 fn aligned_binary_f64_borrows_lazy_float64_clone_data() {
13029 let left = Column::from_f64_values(vec![1.0, f64::NAN, 4.0]).clone();
13030 let right = Column::from_f64_values(vec![10.0, 20.0, 30.0]).clone();
13031
13032 assert!(left.data.is_none());
13033 assert!(right.data.is_none());
13034 assert!(matches!(
13035 &left.values,
13036 ScalarValues::LazyAllValidFloat64 { values, .. } if values.get().is_none()
13037 ));
13038 assert!(matches!(
13039 &right.values,
13040 ScalarValues::LazyAllValidFloat64 { values, .. } if values.get().is_none()
13041 ));
13042
13043 let left_positions = [Some(0), Some(1), Some(2), None];
13044 let right_positions = [Some(2), Some(1), None, Some(0)];
13045 let actual = left
13046 .aligned_binary_f64(&right, &left_positions, &right_positions, ArithmeticOp::Add)
13047 .expect("aligned add");
13048
13049 assert_eq!(
13050 actual.values(),
13051 &[
13052 Scalar::Float64(31.0),
13053 Scalar::Null(NullKind::NaN),
13054 Scalar::Null(NullKind::NaN),
13055 Scalar::Null(NullKind::NaN),
13056 ]
13057 );
13058 if let ScalarValues::LazyAllValidFloat64 { values, .. } = &left.values {
13059 assert!(values.get().is_none());
13060 }
13061 if let ScalarValues::LazyAllValidFloat64 { values, .. } = &right.values {
13062 assert!(values.get().is_none());
13063 }
13064 }
13065
13066 #[test]
13067 fn from_f64_values_marks_nan_missing_like_scalar_path() {
13068 let typed = Column::from_f64_values(vec![1.0, f64::NAN, 3.0, f64::NAN]);
13072 let scalar = Column::new(
13073 DType::Float64,
13074 vec![
13075 Scalar::Float64(1.0),
13076 Scalar::Float64(f64::NAN),
13077 Scalar::Float64(3.0),
13078 Scalar::Float64(f64::NAN),
13079 ],
13080 )
13081 .expect("scalar col");
13082
13083 for idx in 0..typed.len() {
13085 assert_eq!(
13086 typed.validity().get(idx),
13087 scalar.validity().get(idx),
13088 "validity mismatch at {idx}"
13089 );
13090 }
13091 assert!(typed.validity().get(0));
13092 assert!(!typed.validity().get(1));
13093 assert!(typed.validity().get(2));
13094 assert!(!typed.validity().get(3));
13095 assert_eq!(typed.validity().count_valid(), 2);
13096
13097 assert!(typed.as_f64_slice().is_none());
13100
13101 let clean = Column::from_f64_values(vec![1.0, 2.0, 3.0]);
13103 assert!(clean.validity().all());
13104 assert_eq!(clean.as_f64_slice(), Some([1.0, 2.0, 3.0].as_slice()));
13105 }
13106
13107 #[test]
13108 fn vectorized_i64_with_nulls_propagates_missing() {
13109 let left = Column::from_values(vec![
13110 Scalar::Int64(1),
13111 Scalar::Null(NullKind::Null),
13112 Scalar::Int64(3),
13113 ])
13114 .expect("left");
13115 let right = Column::from_values(vec![
13116 Scalar::Int64(10),
13117 Scalar::Int64(20),
13118 Scalar::Null(NullKind::Null),
13119 ])
13120 .expect("right");
13121
13122 let result = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
13123 assert_eq!(result.values()[0], Scalar::Int64(11));
13124 assert!(result.values()[1].is_missing());
13125 assert!(result.values()[2].is_missing());
13126 }
13127
13128 #[test]
13129 fn column_from_values_preserves_mixed_utf8_numeric_scalars() {
13130 let column = Column::from_values(vec![Scalar::Utf8("x".into()), Scalar::Int64(1)])
13131 .expect("mixed object-like constructor should succeed");
13132
13133 assert_eq!(column.dtype(), DType::Utf8);
13134 assert_eq!(
13135 column.values(),
13136 &[Scalar::Utf8("x".into()), Scalar::Int64(1)]
13137 );
13138 }
13139
13140 #[test]
13141 fn vectorized_division_promotes_to_float64() {
13142 let left = Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(21)]).expect("left");
13143 let right = Column::from_values(vec![Scalar::Int64(3), Scalar::Int64(7)]).expect("right");
13144
13145 let result = left.binary_numeric(&right, ArithmeticOp::Div).expect("div");
13146 assert_eq!(result.dtype(), fp_types::DType::Float64);
13148 assert!(matches!(result.values()[0], Scalar::Float64(v) if (v - 10.0/3.0).abs() < 1e-10));
13149 assert_eq!(result.values()[1], Scalar::Float64(3.0));
13150 }
13151
13152 #[test]
13153 fn vectorized_all_four_ops_f64() {
13154 let left = Column::from_values(vec![Scalar::Float64(10.0)]).expect("left");
13155 let right = Column::from_values(vec![Scalar::Float64(3.0)]).expect("right");
13156
13157 let add = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
13158 let sub = left.binary_numeric(&right, ArithmeticOp::Sub).expect("sub");
13159 let mul = left.binary_numeric(&right, ArithmeticOp::Mul).expect("mul");
13160 let div = left.binary_numeric(&right, ArithmeticOp::Div).expect("div");
13161
13162 assert_eq!(add.values()[0], Scalar::Float64(13.0));
13163 assert_eq!(sub.values()[0], Scalar::Float64(7.0));
13164 assert_eq!(mul.values()[0], Scalar::Float64(30.0));
13165 assert!(matches!(div.values()[0], Scalar::Float64(v) if (v - 10.0/3.0).abs() < 1e-10));
13166 }
13167
13168 #[test]
13169 fn pandas_arithmetic_aliases_match_binary_numeric() {
13170 let left = Column::from_values(vec![Scalar::Float64(10.0)]).expect("left");
13171 let right = Column::from_values(vec![Scalar::Float64(3.0)]).expect("right");
13172
13173 assert_eq!(
13174 left.add(&right).expect("add"),
13175 left.binary_numeric(&right, ArithmeticOp::Add).expect("add")
13176 );
13177 assert_eq!(
13178 left.sub(&right).expect("sub"),
13179 left.binary_numeric(&right, ArithmeticOp::Sub).expect("sub")
13180 );
13181 assert_eq!(
13182 left.mul(&right).expect("mul"),
13183 left.binary_numeric(&right, ArithmeticOp::Mul).expect("mul")
13184 );
13185 assert_eq!(
13186 left.div(&right).expect("div"),
13187 left.binary_numeric(&right, ArithmeticOp::Div).expect("div")
13188 );
13189 assert_eq!(
13190 left.divide(&right).expect("divide"),
13191 left.div(&right).expect("div")
13192 );
13193 }
13194
13195 #[test]
13196 fn remaining_pandas_arithmetic_aliases_match_binary_numeric() {
13197 let left = Column::from_values(vec![Scalar::Float64(10.0)]).expect("left");
13198 let right = Column::from_values(vec![Scalar::Float64(3.0)]).expect("right");
13199
13200 assert_eq!(
13201 left.subtract(&right).expect("subtract"),
13202 left.sub(&right).expect("sub")
13203 );
13204 assert_eq!(
13205 left.multiply(&right).expect("multiply"),
13206 left.mul(&right).expect("mul")
13207 );
13208 assert_eq!(
13209 left.truediv(&right).expect("truediv"),
13210 left.div(&right).expect("div")
13211 );
13212 assert_eq!(
13213 left.floordiv(&right).expect("floordiv"),
13214 left.binary_numeric(&right, ArithmeticOp::FloorDiv)
13215 .expect("floordiv")
13216 );
13217 assert_eq!(
13218 left.r#mod(&right).expect("mod"),
13219 left.binary_numeric(&right, ArithmeticOp::Mod).expect("mod")
13220 );
13221 assert_eq!(
13222 left.pow(&right).expect("pow"),
13223 left.binary_numeric(&right, ArithmeticOp::Pow).expect("pow")
13224 );
13225 }
13226
13227 #[test]
13228 fn pandas_reverse_arithmetic_aliases_swap_operands() {
13229 let series = Column::from_values(vec![Scalar::Float64(10.0)]).expect("series");
13230 let other = Column::from_values(vec![Scalar::Float64(3.0)]).expect("other");
13231
13232 assert_eq!(
13233 series.radd(&other).expect("radd"),
13234 other
13235 .binary_numeric(&series, ArithmeticOp::Add)
13236 .expect("add")
13237 );
13238 assert_eq!(
13239 series.rsub(&other).expect("rsub"),
13240 other
13241 .binary_numeric(&series, ArithmeticOp::Sub)
13242 .expect("sub")
13243 );
13244 assert_eq!(
13245 series.rmul(&other).expect("rmul"),
13246 other
13247 .binary_numeric(&series, ArithmeticOp::Mul)
13248 .expect("mul")
13249 );
13250 assert_eq!(
13251 series.rdiv(&other).expect("rdiv"),
13252 other
13253 .binary_numeric(&series, ArithmeticOp::Div)
13254 .expect("div")
13255 );
13256 assert_eq!(
13257 series.rtruediv(&other).expect("rtruediv"),
13258 series.rdiv(&other).expect("rdiv")
13259 );
13260 assert_eq!(
13261 series.rfloordiv(&other).expect("rfloordiv"),
13262 other
13263 .binary_numeric(&series, ArithmeticOp::FloorDiv)
13264 .expect("floordiv")
13265 );
13266 assert_eq!(
13267 series.rmod(&other).expect("rmod"),
13268 other
13269 .binary_numeric(&series, ArithmeticOp::Mod)
13270 .expect("mod")
13271 );
13272 assert_eq!(
13273 series.rpow(&other).expect("rpow"),
13274 other
13275 .binary_numeric(&series, ArithmeticOp::Pow)
13276 .expect("pow")
13277 );
13278 }
13279
13280 #[test]
13281 fn vectorized_f64_mod_pow_floordiv() {
13282 let left = Column::from_values(vec![
13283 Scalar::Float64(10.0),
13284 Scalar::Float64(2.0),
13285 Scalar::Float64(-3.0),
13286 ])
13287 .expect("left");
13288 let right = Column::from_values(vec![
13289 Scalar::Float64(3.0),
13290 Scalar::Float64(3.0),
13291 Scalar::Float64(2.0),
13292 ])
13293 .expect("right");
13294
13295 let modulo = left.binary_numeric(&right, ArithmeticOp::Mod).expect("mod");
13296 assert_eq!(modulo.dtype(), DType::Float64);
13297 assert!(matches!(modulo.values()[0], Scalar::Float64(v) if (v - 1.0).abs() < 1e-10));
13298 assert!(matches!(modulo.values()[1], Scalar::Float64(v) if (v - 2.0).abs() < 1e-10));
13299 assert!(matches!(modulo.values()[2], Scalar::Float64(v) if (v - 1.0).abs() < 1e-10));
13300
13301 let pow = left.binary_numeric(&right, ArithmeticOp::Pow).expect("pow");
13302 assert_eq!(pow.dtype(), DType::Float64);
13303 assert!(matches!(pow.values()[0], Scalar::Float64(v) if (v - 1000.0).abs() < 1e-10));
13304 assert!(matches!(pow.values()[1], Scalar::Float64(v) if (v - 8.0).abs() < 1e-10));
13305 assert!(matches!(pow.values()[2], Scalar::Float64(v) if (v - 9.0).abs() < 1e-10));
13306
13307 let floordiv = left
13308 .binary_numeric(&right, ArithmeticOp::FloorDiv)
13309 .expect("floordiv");
13310 assert_eq!(floordiv.dtype(), DType::Float64);
13311 assert!(matches!(floordiv.values()[0], Scalar::Float64(v) if (v - 3.0).abs() < 1e-10));
13312 assert!(matches!(floordiv.values()[1], Scalar::Float64(v) if (v - 0.0).abs() < 1e-10));
13313 assert!(matches!(floordiv.values()[2], Scalar::Float64(v) if (v - -2.0).abs() < 1e-10));
13314 }
13315
13316 #[test]
13317 fn int_pow_stays_int64_and_negative_exponent_raises_3w0xn() {
13318 let base = Column::from_values(vec![Scalar::Int64(2), Scalar::Int64(3), Scalar::Int64(10)])
13321 .expect("base");
13322 let exp = Column::from_values(vec![Scalar::Int64(3), Scalar::Int64(2), Scalar::Int64(2)])
13323 .expect("exp");
13324 let pow = base
13325 .binary_numeric(&exp, ArithmeticOp::Pow)
13326 .expect("int pow");
13327 assert_eq!(pow.dtype(), DType::Int64);
13328 assert_eq!(pow.values()[0], Scalar::Int64(8));
13329 assert_eq!(pow.values()[1], Scalar::Int64(9));
13330 assert_eq!(pow.values()[2], Scalar::Int64(100));
13331
13332 let neg_exp =
13334 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(-1), Scalar::Int64(2)])
13335 .expect("neg_exp");
13336 let err = base
13337 .binary_numeric(&neg_exp, ArithmeticOp::Pow)
13338 .expect_err("negative integer power must raise");
13339 assert!(matches!(err, ColumnError::NegativeIntegerPower));
13340
13341 let exp_f = Column::from_values(vec![
13343 Scalar::Float64(3.0),
13344 Scalar::Float64(2.0),
13345 Scalar::Float64(2.0),
13346 ])
13347 .expect("exp_f");
13348 let pow_f = base
13349 .binary_numeric(&exp_f, ArithmeticOp::Pow)
13350 .expect("mixed int/float pow");
13351 assert_eq!(pow_f.dtype(), DType::Float64);
13352 assert!(matches!(pow_f.values()[0], Scalar::Float64(v) if (v - 8.0).abs() < 1e-10));
13353 }
13354
13355 #[test]
13356 fn int64_mod_floordiv_preserves_dtype() {
13357 let left = Column::from_values(vec![
13359 Scalar::Int64(10),
13360 Scalar::Int64(20),
13361 Scalar::Int64(30),
13362 ])
13363 .expect("left");
13364 let right = Column::from_values(vec![Scalar::Int64(3), Scalar::Int64(7), Scalar::Int64(4)])
13365 .expect("right");
13366
13367 let modulo = left.binary_numeric(&right, ArithmeticOp::Mod).expect("mod");
13368 assert_eq!(modulo.dtype(), DType::Int64, "mod should preserve Int64");
13369 assert_eq!(modulo.values()[0], Scalar::Int64(1));
13370 assert_eq!(modulo.values()[1], Scalar::Int64(6));
13371 assert_eq!(modulo.values()[2], Scalar::Int64(2));
13372
13373 let floordiv = left
13374 .binary_numeric(&right, ArithmeticOp::FloorDiv)
13375 .expect("floordiv");
13376 assert_eq!(
13377 floordiv.dtype(),
13378 DType::Int64,
13379 "floordiv should preserve Int64"
13380 );
13381 assert_eq!(floordiv.values()[0], Scalar::Int64(3));
13382 assert_eq!(floordiv.values()[1], Scalar::Int64(2));
13383 assert_eq!(floordiv.values()[2], Scalar::Int64(7));
13384 }
13385
13386 #[test]
13387 fn int64_mod_floordiv_match_pandas_negative_operand_signs() {
13388 let left = Column::from_values(vec![
13389 Scalar::Int64(7),
13390 Scalar::Int64(-7),
13391 Scalar::Int64(-7),
13392 Scalar::Int64(7),
13393 ])
13394 .expect("left");
13395 let right = Column::from_values(vec![
13396 Scalar::Int64(-3),
13397 Scalar::Int64(3),
13398 Scalar::Int64(-3),
13399 Scalar::Int64(3),
13400 ])
13401 .expect("right");
13402
13403 let modulo = left.binary_numeric(&right, ArithmeticOp::Mod).expect("mod");
13404 assert_eq!(modulo.dtype(), DType::Int64);
13405 assert_eq!(
13406 modulo.values(),
13407 &[
13408 Scalar::Int64(-2),
13409 Scalar::Int64(2),
13410 Scalar::Int64(-1),
13411 Scalar::Int64(1)
13412 ]
13413 );
13414
13415 let floordiv = left
13416 .binary_numeric(&right, ArithmeticOp::FloorDiv)
13417 .expect("floordiv");
13418 assert_eq!(floordiv.dtype(), DType::Int64);
13419 assert_eq!(
13420 floordiv.values(),
13421 &[
13422 Scalar::Int64(-3),
13423 Scalar::Int64(-3),
13424 Scalar::Int64(2),
13425 Scalar::Int64(2)
13426 ]
13427 );
13428 }
13429
13430 #[test]
13431 fn float64_mod_floordiv_match_pandas_negative_operand_signs() {
13432 let left = Column::from_values(vec![
13433 Scalar::Float64(7.0),
13434 Scalar::Float64(-7.0),
13435 Scalar::Float64(-7.0),
13436 Scalar::Float64(7.0),
13437 ])
13438 .expect("left");
13439 let right = Column::from_values(vec![
13440 Scalar::Float64(-3.0),
13441 Scalar::Float64(3.0),
13442 Scalar::Float64(-3.0),
13443 Scalar::Float64(3.0),
13444 ])
13445 .expect("right");
13446
13447 let modulo = left.binary_numeric(&right, ArithmeticOp::Mod).expect("mod");
13448 assert_eq!(modulo.dtype(), DType::Float64);
13449 assert!(matches!(modulo.values()[0], Scalar::Float64(v) if (v + 2.0).abs() < 1e-10));
13450 assert!(matches!(modulo.values()[1], Scalar::Float64(v) if (v - 2.0).abs() < 1e-10));
13451 assert!(matches!(modulo.values()[2], Scalar::Float64(v) if (v + 1.0).abs() < 1e-10));
13452 assert!(matches!(modulo.values()[3], Scalar::Float64(v) if (v - 1.0).abs() < 1e-10));
13453
13454 let floordiv = left
13455 .binary_numeric(&right, ArithmeticOp::FloorDiv)
13456 .expect("floordiv");
13457 assert_eq!(floordiv.dtype(), DType::Float64);
13458 assert!(matches!(floordiv.values()[0], Scalar::Float64(v) if (v + 3.0).abs() < 1e-10));
13459 assert!(matches!(floordiv.values()[1], Scalar::Float64(v) if (v + 3.0).abs() < 1e-10));
13460 assert!(matches!(floordiv.values()[2], Scalar::Float64(v) if (v - 2.0).abs() < 1e-10));
13461 assert!(matches!(floordiv.values()[3], Scalar::Float64(v) if (v - 2.0).abs() < 1e-10));
13462 }
13463
13464 #[test]
13465 fn int64_mod_floordiv_with_zero_promotes_to_float() {
13466 let left = Column::from_values(vec![
13468 Scalar::Int64(10),
13469 Scalar::Int64(20),
13470 Scalar::Int64(30),
13471 ])
13472 .expect("left");
13473 let right = Column::from_values(vec![
13474 Scalar::Int64(3),
13475 Scalar::Int64(0), Scalar::Int64(4),
13477 ])
13478 .expect("right");
13479
13480 let modulo = left.binary_numeric(&right, ArithmeticOp::Mod).expect("mod");
13481 assert_eq!(
13482 modulo.dtype(),
13483 DType::Float64,
13484 "mod with zero should promote to Float64"
13485 );
13486 assert!(matches!(modulo.values()[0], Scalar::Float64(v) if (v - 1.0).abs() < 1e-10));
13487 assert!(matches!(modulo.values()[1], Scalar::Float64(v) if v.is_nan()));
13488 assert!(matches!(modulo.values()[2], Scalar::Float64(v) if (v - 2.0).abs() < 1e-10));
13489
13490 let floordiv = left
13491 .binary_numeric(&right, ArithmeticOp::FloorDiv)
13492 .expect("floordiv");
13493 assert_eq!(
13494 floordiv.dtype(),
13495 DType::Float64,
13496 "floordiv with zero should promote to Float64"
13497 );
13498 assert!(matches!(floordiv.values()[0], Scalar::Float64(v) if (v - 3.0).abs() < 1e-10));
13499 assert!(matches!(floordiv.values()[1], Scalar::Float64(v) if v.is_infinite()));
13500 assert!(matches!(floordiv.values()[2], Scalar::Float64(v) if (v - 7.0).abs() < 1e-10));
13501 }
13502
13503 #[test]
13504 fn vectorized_empty_columns() {
13505 let left = Column::from_values(vec![]).expect("left");
13506 let right = Column::from_values(vec![]).expect("right");
13507 let result = left
13508 .binary_numeric(&right, ArithmeticOp::Add)
13509 .expect("add empty");
13510 assert!(result.is_empty());
13511 }
13512
13513 #[test]
13514 fn vectorized_large_column_matches_scalar_semantics() {
13515 let n = 4096;
13517 let left_values: Vec<Scalar> = (0..n).map(|i| Scalar::Float64(i as f64)).collect();
13518 let right_values: Vec<Scalar> = (0..n).map(|i| Scalar::Float64((n - i) as f64)).collect();
13519
13520 let left = Column::from_values(left_values).expect("left");
13521 let right = Column::from_values(right_values).expect("right");
13522
13523 let result = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
13524
13525 for (i, v) in result.values().iter().enumerate() {
13527 assert_eq!(*v, Scalar::Float64(n as f64), "position {i} should be {n}");
13528 }
13529 }
13530
13531 #[test]
13532 fn vectorized_nan_vs_null_distinction_preserved() {
13533 let left =
13535 Column::from_values(vec![Scalar::Float64(f64::NAN), Scalar::Null(NullKind::NaN)])
13536 .expect("left");
13537 let right =
13538 Column::from_values(vec![Scalar::Float64(1.0), Scalar::Float64(2.0)]).expect("right");
13539
13540 let result = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
13541 assert!(result.values()[0].is_nan(), "NaN + valid = NaN");
13543 assert!(result.values()[1].is_nan(), "NaN-null + valid = NaN");
13544 }
13545
13546 #[test]
13547 fn vectorized_mixed_type_falls_back_to_scalar() {
13548 let left = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("left");
13550 let right =
13551 Column::from_values(vec![Scalar::Float64(0.5), Scalar::Float64(1.5)]).expect("right");
13552
13553 let result = left.binary_numeric(&right, ArithmeticOp::Add).expect("add");
13554 assert_eq!(result.dtype(), fp_types::DType::Float64);
13555 assert_eq!(result.values()[0], Scalar::Float64(1.5));
13556 assert_eq!(result.values()[1], Scalar::Float64(3.5));
13557 }
13558
13559 #[test]
13560 fn vectorized_i64_sub_and_mul() {
13561 let left = Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(20)]).expect("left");
13562 let right = Column::from_values(vec![Scalar::Int64(3), Scalar::Int64(5)]).expect("right");
13563
13564 let sub = left.binary_numeric(&right, ArithmeticOp::Sub).expect("sub");
13565 assert_eq!(sub.values()[0], Scalar::Int64(7));
13566 assert_eq!(sub.values()[1], Scalar::Int64(15));
13567
13568 let mul = left.binary_numeric(&right, ArithmeticOp::Mul).expect("mul");
13569 assert_eq!(mul.values()[0], Scalar::Int64(30));
13570 assert_eq!(mul.values()[1], Scalar::Int64(100));
13571 }
13572
13573 mod crack_tests {
13576 use fp_types::Scalar;
13577
13578 use super::super::*;
13579
13580 fn make_column(values: &[f64]) -> Column {
13581 Column::from_values(values.iter().map(|&v| Scalar::Float64(v)).collect()).expect("col")
13582 }
13583
13584 #[test]
13585 fn crack_filter_gt_basic() {
13586 let col = make_column(&[1.0, 5.0, 3.0, 7.0, 2.0]);
13587 let mut crack = CrackIndex::new(col.len());
13588
13589 let gt3 = crack.filter_gt(&col, 3.0);
13590 let mut gt3_vals: Vec<f64> = gt3
13591 .iter()
13592 .map(|&i| col.values()[i].to_f64().unwrap())
13593 .collect();
13594 gt3_vals.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
13595 assert_eq!(gt3_vals, vec![5.0, 7.0]);
13596 assert_eq!(crack.num_cracks(), 1);
13597 }
13598
13599 #[test]
13600 fn crack_filter_lte_basic() {
13601 let col = make_column(&[1.0, 5.0, 3.0, 7.0, 2.0]);
13602 let mut crack = CrackIndex::new(col.len());
13603
13604 let lte3 = crack.filter_lte(&col, 3.0);
13605 let mut lte3_vals: Vec<f64> = lte3
13606 .iter()
13607 .map(|&i| col.values()[i].to_f64().unwrap())
13608 .collect();
13609 lte3_vals.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
13610 assert_eq!(lte3_vals, vec![1.0, 2.0, 3.0]);
13611 }
13612
13613 #[test]
13614 fn crack_filter_eq() {
13615 let col = make_column(&[1.0, 3.0, 3.0, 7.0, 3.0]);
13616 let mut crack = CrackIndex::new(col.len());
13617
13618 let eq3 = crack.filter_eq(&col, 3.0);
13619 assert_eq!(eq3.len(), 3, "three values equal to 3.0");
13620 for &idx in &eq3 {
13621 assert_eq!(col.values()[idx].to_f64().unwrap(), 3.0);
13622 }
13623 }
13624
13625 #[test]
13626 fn crack_filter_lt() {
13627 let col = make_column(&[1.0, 5.0, 3.0, 7.0, 2.0]);
13628 let mut crack = CrackIndex::new(col.len());
13629
13630 let lt3 = crack.filter_lt(&col, 3.0);
13631 let mut lt3_vals: Vec<f64> = lt3
13632 .iter()
13633 .map(|&i| col.values()[i].to_f64().unwrap())
13634 .collect();
13635 lt3_vals.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
13636 assert_eq!(lt3_vals, vec![1.0, 2.0]);
13637 }
13638
13639 #[test]
13640 fn crack_filter_gte() {
13641 let col = make_column(&[1.0, 5.0, 3.0, 7.0, 2.0]);
13642 let mut crack = CrackIndex::new(col.len());
13643
13644 let gte3 = crack.filter_gte(&col, 3.0);
13645 let mut gte3_vals: Vec<f64> = gte3
13646 .iter()
13647 .map(|&i| col.values()[i].to_f64().unwrap())
13648 .collect();
13649 gte3_vals.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
13650 assert_eq!(gte3_vals, vec![3.0, 5.0, 7.0]);
13651 }
13652
13653 #[test]
13654 fn crack_progressive_refinement() {
13655 let col = make_column(&[10.0, 2.0, 8.0, 4.0, 6.0, 1.0, 9.0, 3.0, 7.0, 5.0]);
13656 let mut crack = CrackIndex::new(col.len());
13657
13658 let gt5 = crack.filter_gt(&col, 5.0);
13660 assert_eq!(gt5.len(), 5);
13661 assert_eq!(crack.num_cracks(), 1);
13662
13663 let gt3 = crack.filter_gt(&col, 3.0);
13665 assert_eq!(gt3.len(), 7); assert_eq!(crack.num_cracks(), 2);
13667
13668 let gt7 = crack.filter_gt(&col, 7.0);
13670 assert_eq!(gt7.len(), 3); assert_eq!(crack.num_cracks(), 3);
13672 }
13673
13674 #[test]
13675 fn crack_duplicate_crack_point_is_idempotent() {
13676 let col = make_column(&[1.0, 5.0, 3.0, 7.0, 2.0]);
13677 let mut crack = CrackIndex::new(col.len());
13678
13679 let gt3_first = crack.filter_gt(&col, 3.0);
13680 let gt3_second = crack.filter_gt(&col, 3.0);
13681
13682 let mut a: Vec<usize> = gt3_first;
13684 let mut b: Vec<usize> = gt3_second;
13685 a.sort_unstable();
13686 b.sort_unstable();
13687 assert_eq!(a, b);
13688 assert_eq!(crack.num_cracks(), 1, "no duplicate crack point");
13689 }
13690
13691 #[test]
13692 fn crack_empty_column() {
13693 let col = make_column(&[]);
13694 let mut crack = CrackIndex::new(col.len());
13695
13696 assert!(crack.filter_gt(&col, 5.0).is_empty());
13697 assert!(crack.filter_lte(&col, 5.0).is_empty());
13698 }
13699
13700 #[test]
13701 fn crack_single_element() {
13702 let col = make_column(&[42.0]);
13703 let mut crack = CrackIndex::new(col.len());
13704
13705 assert!(crack.filter_gt(&col, 42.0).is_empty());
13706 assert_eq!(crack.filter_lte(&col, 42.0).len(), 1);
13707 assert_eq!(crack.filter_eq(&col, 42.0).len(), 1);
13708 }
13709
13710 #[test]
13711 fn crack_all_same_values() {
13712 let col = make_column(&[5.0, 5.0, 5.0, 5.0]);
13713 let mut crack = CrackIndex::new(col.len());
13714
13715 assert!(crack.filter_gt(&col, 5.0).is_empty());
13716 assert_eq!(crack.filter_lte(&col, 5.0).len(), 4);
13717 assert_eq!(crack.filter_eq(&col, 5.0).len(), 4);
13718 }
13719
13720 #[test]
13721 fn crack_isomorphism_with_full_scan() {
13722 let col = make_column(&[10.0, 2.0, 8.0, 4.0, 6.0, 1.0, 9.0, 3.0, 7.0, 5.0]);
13724 let mut crack = CrackIndex::new(col.len());
13725
13726 for pivot in [1.0, 3.0, 5.0, 7.0, 9.0, 0.0, 11.0] {
13727 let mut cracked: Vec<usize> = crack.filter_gt(&col, pivot);
13728 cracked.sort_unstable();
13729
13730 let mut naive: Vec<usize> = (0..col.len())
13731 .filter(|&i| col.values()[i].to_f64().unwrap() > pivot)
13732 .collect();
13733 naive.sort_unstable();
13734
13735 assert_eq!(
13736 cracked, naive,
13737 "cracked vs naive mismatch for pivot={pivot}"
13738 );
13739 }
13740 }
13741
13742 #[test]
13743 fn crack_int64_column() {
13744 let col = Column::from_values(vec![
13745 Scalar::Int64(10),
13746 Scalar::Int64(5),
13747 Scalar::Int64(3),
13748 Scalar::Int64(8),
13749 Scalar::Int64(1),
13750 ])
13751 .expect("col");
13752 let mut crack = CrackIndex::new(col.len());
13753
13754 let gt5 = crack.filter_gt(&col, 5.0);
13755 let mut gt5_vals: Vec<i64> = gt5
13756 .iter()
13757 .filter_map(|&i| match &col.values()[i] {
13758 Scalar::Int64(v) => Some(*v),
13759 _ => None,
13760 })
13761 .collect();
13762 assert_eq!(gt5_vals.len(), gt5.len(), "expected Int64 values");
13763 gt5_vals.sort_unstable();
13764 assert_eq!(gt5_vals, vec![8, 10]);
13765 }
13766
13767 #[test]
13768 fn crack_large_column_correctness() {
13769 let n = 1000;
13770 let values: Vec<f64> = (0..n).map(|i| ((i * 7 + 13) % n) as f64).collect();
13771 let col = make_column(&values);
13772 let mut crack = CrackIndex::new(col.len());
13773
13774 for pivot in [100.0, 500.0, 250.0, 750.0, 50.0, 900.0] {
13776 let mut cracked: Vec<usize> = crack.filter_gt(&col, pivot);
13777 cracked.sort_unstable();
13778
13779 let mut naive: Vec<usize> =
13780 (0..n as usize).filter(|&i| values[i] > pivot).collect();
13781 naive.sort_unstable();
13782
13783 assert_eq!(cracked, naive, "large column mismatch for pivot={pivot}");
13784 }
13785 }
13786 }
13787
13788 mod comparison_tests {
13791 use fp_types::{NullKind, Scalar};
13792
13793 use super::super::*;
13794
13795 #[test]
13796 fn comparison_gt_int64() {
13797 let left =
13798 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(5), Scalar::Int64(3)])
13799 .expect("left");
13800 let right =
13801 Column::from_values(vec![Scalar::Int64(3), Scalar::Int64(3), Scalar::Int64(3)])
13802 .expect("right");
13803
13804 let result = left
13805 .binary_comparison(&right, ComparisonOp::Gt)
13806 .expect("gt");
13807 assert_eq!(result.dtype(), fp_types::DType::Bool);
13808 assert_eq!(result.values()[0], Scalar::Bool(false));
13809 assert_eq!(result.values()[1], Scalar::Bool(true));
13810 assert_eq!(result.values()[2], Scalar::Bool(false));
13811 }
13812
13813 #[test]
13814 fn comparison_all_ops_numeric() {
13815 let left = Column::from_values(vec![Scalar::Float64(5.0)]).expect("left");
13816 let right = Column::from_values(vec![Scalar::Float64(3.0)]).expect("right");
13817
13818 let gt = left
13819 .binary_comparison(&right, ComparisonOp::Gt)
13820 .expect("gt");
13821 let lt = left
13822 .binary_comparison(&right, ComparisonOp::Lt)
13823 .expect("lt");
13824 let eq = left
13825 .binary_comparison(&right, ComparisonOp::Eq)
13826 .expect("eq");
13827 let ne = left
13828 .binary_comparison(&right, ComparisonOp::Ne)
13829 .expect("ne");
13830 let ge = left
13831 .binary_comparison(&right, ComparisonOp::Ge)
13832 .expect("ge");
13833 let le = left
13834 .binary_comparison(&right, ComparisonOp::Le)
13835 .expect("le");
13836
13837 assert_eq!(gt.values()[0], Scalar::Bool(true));
13838 assert_eq!(lt.values()[0], Scalar::Bool(false));
13839 assert_eq!(eq.values()[0], Scalar::Bool(false));
13840 assert_eq!(ne.values()[0], Scalar::Bool(true));
13841 assert_eq!(ge.values()[0], Scalar::Bool(true));
13842 assert_eq!(le.values()[0], Scalar::Bool(false));
13843 }
13844
13845 #[test]
13846 fn pandas_comparison_aliases_match_binary_comparison() {
13847 let left = Column::from_values(vec![Scalar::Float64(5.0)]).expect("left");
13848 let right = Column::from_values(vec![Scalar::Float64(3.0)]).expect("right");
13849
13850 assert_eq!(
13851 left.eq(&right).expect("eq"),
13852 left.binary_comparison(&right, ComparisonOp::Eq)
13853 .expect("eq")
13854 );
13855 assert_eq!(
13856 left.ne(&right).expect("ne"),
13857 left.binary_comparison(&right, ComparisonOp::Ne)
13858 .expect("ne")
13859 );
13860 assert_eq!(
13861 left.lt(&right).expect("lt"),
13862 left.binary_comparison(&right, ComparisonOp::Lt)
13863 .expect("lt")
13864 );
13865 assert_eq!(
13866 left.le(&right).expect("le"),
13867 left.binary_comparison(&right, ComparisonOp::Le)
13868 .expect("le")
13869 );
13870 assert_eq!(
13871 left.gt(&right).expect("gt"),
13872 left.binary_comparison(&right, ComparisonOp::Gt)
13873 .expect("gt")
13874 );
13875 assert_eq!(
13876 left.ge(&right).expect("ge"),
13877 left.binary_comparison(&right, ComparisonOp::Ge)
13878 .expect("ge")
13879 );
13880 }
13881
13882 #[test]
13883 fn comparison_equality_equal_values() {
13884 let col = Column::from_values(vec![Scalar::Int64(42)]).expect("col");
13885 let result = col.binary_comparison(&col, ComparisonOp::Eq).expect("eq");
13886 assert_eq!(result.values()[0], Scalar::Bool(true));
13887
13888 let ne = col.binary_comparison(&col, ComparisonOp::Ne).expect("ne");
13889 assert_eq!(ne.values()[0], Scalar::Bool(false));
13890 }
13891
13892 #[test]
13893 fn comparison_null_propagation() {
13894 let left = Column::from_values(vec![
13895 Scalar::Int64(1),
13896 Scalar::Null(NullKind::Null),
13897 Scalar::Int64(3),
13898 ])
13899 .expect("left");
13900 let right = Column::from_values(vec![
13901 Scalar::Int64(2),
13902 Scalar::Int64(2),
13903 Scalar::Null(NullKind::Null),
13904 ])
13905 .expect("right");
13906
13907 let result = left
13908 .binary_comparison(&right, ComparisonOp::Gt)
13909 .expect("gt");
13910 assert_eq!(result.values()[0], Scalar::Bool(false));
13911 assert!(result.values()[1].is_missing(), "null op valid = null");
13912 assert!(result.values()[2].is_missing(), "valid op null = null");
13913 }
13914
13915 #[test]
13916 fn comparison_utf8_lexicographic() {
13917 let left = Column::from_values(vec![
13918 Scalar::Utf8("banana".to_string()),
13919 Scalar::Utf8("apple".to_string()),
13920 ])
13921 .expect("left");
13922 let right = Column::from_values(vec![
13923 Scalar::Utf8("apple".to_string()),
13924 Scalar::Utf8("cherry".to_string()),
13925 ])
13926 .expect("right");
13927
13928 let gt = left
13929 .binary_comparison(&right, ComparisonOp::Gt)
13930 .expect("gt");
13931 assert_eq!(gt.values()[0], Scalar::Bool(true));
13932 assert_eq!(gt.values()[1], Scalar::Bool(false));
13933 }
13934
13935 #[test]
13936 fn compare_scalar_gt() {
13937 let col = Column::from_values(vec![
13938 Scalar::Int64(1),
13939 Scalar::Int64(5),
13940 Scalar::Null(NullKind::Null),
13941 Scalar::Int64(3),
13942 ])
13943 .expect("col");
13944
13945 let result = col
13946 .compare_scalar(&Scalar::Int64(3), ComparisonOp::Gt)
13947 .expect("gt");
13948 assert_eq!(result.values()[0], Scalar::Bool(false));
13949 assert_eq!(result.values()[1], Scalar::Bool(true));
13950 assert!(result.values()[2].is_missing());
13951 assert_eq!(result.values()[3], Scalar::Bool(false));
13952 }
13953
13954 #[test]
13955 fn compare_scalar_with_missing_scalar() {
13956 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
13957
13958 let result = col
13959 .compare_scalar(&Scalar::Null(NullKind::Null), ComparisonOp::Eq)
13960 .expect("eq");
13961 assert!(result.values()[0].is_missing());
13962 assert!(result.values()[1].is_missing());
13963 }
13964
13965 #[test]
13966 fn filter_by_mask_basic() {
13967 let col = Column::from_values(vec![
13968 Scalar::Int64(10),
13969 Scalar::Int64(20),
13970 Scalar::Int64(30),
13971 Scalar::Int64(40),
13972 ])
13973 .expect("col");
13974 let mask = Column::from_values(vec![
13975 Scalar::Bool(true),
13976 Scalar::Bool(false),
13977 Scalar::Bool(true),
13978 Scalar::Bool(false),
13979 ])
13980 .expect("mask");
13981
13982 let result = col.filter_by_mask(&mask).expect("filter");
13983 assert_eq!(result.len(), 2);
13984 assert_eq!(result.values()[0], Scalar::Int64(10));
13985 assert_eq!(result.values()[1], Scalar::Int64(30));
13986 }
13987
13988 #[test]
13989 fn filter_by_mask_float64_typed_path_matches_scalar() {
13990 let col = Column::from_f64_values(vec![1.5, -0.0, 2.5, f64::INFINITY, 0.0]);
13994 let mask = Column::from_values(vec![
13995 Scalar::Bool(true),
13996 Scalar::Bool(true),
13997 Scalar::Bool(false),
13998 Scalar::Bool(true),
13999 Scalar::Null(NullKind::Null), ])
14001 .expect("mask");
14002 let result = col.filter_by_mask(&mask).expect("filter");
14003 assert_eq!(result.dtype(), DType::Float64);
14004 assert_eq!(
14005 result.values(),
14006 &[
14007 Scalar::Float64(1.5),
14008 Scalar::Float64(-0.0),
14009 Scalar::Float64(f64::INFINITY),
14010 ]
14011 );
14012 }
14013
14014 #[test]
14015 fn compare_scalar_typed_path_matches_scalar_compare() {
14016 let f64_vals = vec![1.5f64, -0.0, 0.0, 2.5, -3.0, f64::INFINITY, 100.0];
14020 let i64_vals = vec![1i64, -2, 0, 5, 100, -7];
14021 let ops = [
14022 ComparisonOp::Gt,
14023 ComparisonOp::Lt,
14024 ComparisonOp::Eq,
14025 ComparisonOp::Ne,
14026 ComparisonOp::Ge,
14027 ComparisonOp::Le,
14028 ];
14029 for op in ops {
14030 for &probe in &[0.0f64, 1.5, 2.5, -3.0, f64::INFINITY] {
14032 let got = Column::from_f64_values(f64_vals.clone())
14033 .compare_scalar(&Scalar::Float64(probe), op)
14034 .expect("f64 cmp");
14035 let expected: Vec<Scalar> = f64_vals
14036 .iter()
14037 .map(|&v| {
14038 Scalar::Bool(
14039 scalar_compare(&Scalar::Float64(v), &Scalar::Float64(probe), op)
14040 .unwrap(),
14041 )
14042 })
14043 .collect();
14044 assert_eq!(
14045 got.values(),
14046 expected.as_slice(),
14047 "f64 op {op:?} probe {probe}"
14048 );
14049 }
14050 let got = Column::from_f64_values(f64_vals.clone())
14052 .compare_scalar(&Scalar::Int64(2), op)
14053 .expect("f64-vs-i64 cmp");
14054 let expected: Vec<Scalar> = f64_vals
14055 .iter()
14056 .map(|&v| {
14057 Scalar::Bool(
14058 scalar_compare(&Scalar::Float64(v), &Scalar::Int64(2), op).unwrap(),
14059 )
14060 })
14061 .collect();
14062 assert_eq!(got.values(), expected.as_slice(), "f64-vs-i64 op {op:?}");
14063 let got = Column::from_i64_values(i64_vals.clone())
14065 .compare_scalar(&Scalar::Int64(0), op)
14066 .expect("i64 cmp");
14067 let expected: Vec<Scalar> = i64_vals
14068 .iter()
14069 .map(|&v| {
14070 Scalar::Bool(
14071 scalar_compare(&Scalar::Int64(v), &Scalar::Int64(0), op).unwrap(),
14072 )
14073 })
14074 .collect();
14075 assert_eq!(got.values(), expected.as_slice(), "i64 op {op:?}");
14076 }
14077 }
14078
14079 #[test]
14080 #[ignore = "perf timing harness, run with --ignored"]
14081 fn compare_scalar_typed_vs_aos_timing() {
14082 use std::time::Instant;
14083 let n = 5_000_000usize;
14084 let raw: Vec<f64> = (0..n).map(|i| (i % 1000) as f64 - 500.0).collect();
14085 let scalars: Vec<Scalar> = raw.iter().map(|&v| Scalar::Float64(v)).collect();
14086 let probe = Scalar::Float64(0.0);
14087 let op = ComparisonOp::Gt;
14088
14089 let t = Instant::now();
14091 let aos: Vec<Scalar> = scalars
14092 .iter()
14093 .map(|v| Scalar::Bool(scalar_compare(v, &probe, op).unwrap()))
14094 .collect();
14095 let aos_ns = t.elapsed().as_nanos();
14096 std::hint::black_box(&aos);
14097
14098 let col = Column::from_f64_values(raw.clone());
14100 let t = Instant::now();
14101 let typed = col.compare_scalar(&probe, op).expect("typed cmp");
14102 let typed_ns = t.elapsed().as_nanos();
14103 std::hint::black_box(&typed);
14104
14105 assert_eq!(typed.values(), aos.as_slice(), "typed must match AoS");
14106 let ratio = aos_ns as f64 / typed_ns as f64;
14107 println!(
14108 "compare_scalar Gt n={n}: AoS {aos_ns}ns typed {typed_ns}ns Score={ratio:.2}x"
14109 );
14110 }
14111
14112 #[test]
14113 fn filter_by_mask_null_treated_as_false() {
14114 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14115 let mask = Column::from_values(vec![Scalar::Bool(true), Scalar::Null(NullKind::Null)])
14116 .expect("mask");
14117
14118 let result = col.filter_by_mask(&mask).expect("filter");
14119 assert_eq!(result.len(), 1);
14120 assert_eq!(result.values()[0], Scalar::Int64(1));
14121 }
14122
14123 #[test]
14124 fn filter_by_mask_rejects_non_boolean_mask() {
14125 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14126 let mask = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(0)]).expect("mask");
14127
14128 let err = col.filter_by_mask(&mask).expect_err("non-bool mask");
14129 assert!(matches!(err, ColumnError::InvalidMaskType { .. }));
14130 }
14131
14132 #[test]
14133 fn filter_by_mask_empty_result() {
14134 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14135 let mask =
14136 Column::from_values(vec![Scalar::Bool(false), Scalar::Bool(false)]).expect("mask");
14137
14138 let result = col.filter_by_mask(&mask).expect("filter");
14139 assert!(result.is_empty());
14140 }
14141
14142 #[test]
14143 fn fillna_replaces_missing() {
14144 let col = Column::from_values(vec![
14145 Scalar::Int64(1),
14146 Scalar::Null(NullKind::Null),
14147 Scalar::Int64(3),
14148 Scalar::Null(NullKind::Null),
14149 ])
14150 .expect("col");
14151
14152 let result = col.fillna(&Scalar::Int64(0)).expect("fillna");
14153 assert_eq!(result.values()[0], Scalar::Int64(1));
14154 assert_eq!(result.values()[1], Scalar::Int64(0));
14155 assert_eq!(result.values()[2], Scalar::Int64(3));
14156 assert_eq!(result.values()[3], Scalar::Int64(0));
14157 assert_eq!(result.validity().count_valid(), 4);
14158 }
14159
14160 #[test]
14161 fn dropna_removes_missing() {
14162 let col = Column::from_values(vec![
14163 Scalar::Int64(1),
14164 Scalar::Null(NullKind::Null),
14165 Scalar::Int64(3),
14166 Scalar::Null(NullKind::NaN),
14167 ])
14168 .expect("col");
14169
14170 let result = col.dropna().expect("dropna");
14171 assert_eq!(result.len(), 2);
14172 assert_eq!(result.values()[0], Scalar::Int64(1));
14173 assert_eq!(result.values()[1], Scalar::Int64(3));
14174 }
14175
14176 #[test]
14177 fn comparison_empty_columns() {
14178 let left = Column::from_values(vec![]).expect("left");
14179 let right = Column::from_values(vec![]).expect("right");
14180 let result = left
14181 .binary_comparison(&right, ComparisonOp::Eq)
14182 .expect("eq");
14183 assert!(result.is_empty());
14184 }
14185
14186 #[test]
14187 fn comparison_length_mismatch_error() {
14188 let left = Column::from_values(vec![Scalar::Int64(1)]).expect("left");
14189 let right =
14190 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("right");
14191 assert!(left.binary_comparison(&right, ComparisonOp::Eq).is_err());
14192 }
14193
14194 #[test]
14195 fn comparison_bool_ordering() {
14196 let left =
14197 Column::from_values(vec![Scalar::Bool(true), Scalar::Bool(false)]).expect("left");
14198 let right =
14199 Column::from_values(vec![Scalar::Bool(false), Scalar::Bool(true)]).expect("right");
14200
14201 let gt = left
14202 .binary_comparison(&right, ComparisonOp::Gt)
14203 .expect("gt");
14204 assert_eq!(gt.values()[0], Scalar::Bool(true));
14205 assert_eq!(gt.values()[1], Scalar::Bool(false));
14206 }
14207 }
14208
14209 mod iter_and_predicates {
14210 use fp_types::NullKind;
14211
14212 use super::*;
14213
14214 #[test]
14215 fn iter_values_preserves_order() {
14216 let col =
14217 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
14218 .expect("col");
14219 let collected: Vec<_> = col.iter_values().cloned().collect();
14220 assert_eq!(collected, col.values());
14221 }
14222
14223 #[test]
14224 fn to_vec_returns_owned_clone() {
14225 let col = Column::from_values(vec![Scalar::Int64(5), Scalar::Int64(6)]).expect("col");
14226 let v = col.to_vec();
14227 assert_eq!(v, vec![Scalar::Int64(5), Scalar::Int64(6)]);
14228 assert_eq!(col.len(), 2);
14230 }
14231
14232 #[test]
14233 fn copy_returns_independent_clone() {
14234 let col = Column::from_values(vec![Scalar::Int64(5), Scalar::Int64(6)]).expect("col");
14235 let copied = col.copy();
14236 let viewed = col.view();
14237 let transposed = col.transpose();
14238 assert_eq!(copied, col);
14239 assert_eq!(viewed, col);
14240 assert_eq!(transposed, col);
14241 assert_eq!(col.t(), transposed);
14242 assert_eq!(col.T(), transposed);
14243 assert_ne!(copied.values().as_ptr(), col.values().as_ptr());
14244 assert_ne!(viewed.values().as_ptr(), col.values().as_ptr());
14245 assert_ne!(transposed.values().as_ptr(), col.values().as_ptr());
14246 }
14247
14248 #[test]
14249 fn item_extracts_single_value_and_rejects_other_lengths() {
14250 let single = Column::from_values(vec![Scalar::Int64(5)]).expect("col");
14251 assert_eq!(single.item(), Ok(Scalar::Int64(5)));
14252
14253 let empty = Column::from_values(Vec::<Scalar>::new()).expect("col");
14254 assert_eq!(
14255 empty.item(),
14256 Err(crate::ColumnError::InvalidLength {
14257 operation: "item()",
14258 expected: 1,
14259 actual: 0,
14260 })
14261 );
14262
14263 let multi = Column::from_values(vec![Scalar::Int64(5), Scalar::Int64(6)]).expect("col");
14264 assert_eq!(
14265 multi.item(),
14266 Err(crate::ColumnError::InvalidLength {
14267 operation: "item()",
14268 expected: 1,
14269 actual: 2,
14270 })
14271 );
14272 }
14273
14274 #[test]
14275 fn has_any_missing_detects_null() {
14276 let populated =
14277 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14278 assert!(!populated.has_any_missing());
14279 assert_eq!(populated.hasnans(), populated.has_any_missing());
14280 assert_eq!(populated.nbytes(), populated.memory_usage(false));
14281
14282 let with_null =
14283 Column::from_values(vec![Scalar::Int64(1), Scalar::Null(NullKind::NaN)])
14284 .expect("col");
14285 assert!(with_null.has_any_missing());
14286 assert_eq!(with_null.hasnans(), with_null.has_any_missing());
14287 assert_eq!(with_null.nbytes(), with_null.memory_usage(false));
14288 }
14289
14290 #[test]
14291 fn all_missing_empty_is_true() {
14292 let empty = Column::from_values(Vec::<Scalar>::new()).expect("col");
14293 assert!(empty.all_missing());
14294
14295 let all_null = Column::from_values(vec![
14296 Scalar::Null(NullKind::NaN),
14297 Scalar::Null(NullKind::Null),
14298 ])
14299 .expect("col");
14300 assert!(all_null.all_missing());
14301
14302 let mixed = Column::from_values(vec![Scalar::Int64(1), Scalar::Null(NullKind::NaN)])
14303 .expect("col");
14304 assert!(!mixed.all_missing());
14305 }
14306
14307 #[test]
14308 fn apply_bool_positive_predicate() {
14309 let col = Column::from_values(vec![
14310 Scalar::Int64(1),
14311 Scalar::Int64(2),
14312 Scalar::Int64(3),
14313 Scalar::Int64(4),
14314 ])
14315 .expect("col");
14316 let even = col
14317 .apply_bool(|v| v.to_f64().map(|f| f as i64 % 2 == 0).unwrap_or(false))
14318 .expect("apply_bool");
14319 assert_eq!(even.dtype(), DType::Bool);
14320 assert_eq!(even.values()[0], Scalar::Bool(false));
14321 assert_eq!(even.values()[1], Scalar::Bool(true));
14322 assert_eq!(even.values()[2], Scalar::Bool(false));
14323 assert_eq!(even.values()[3], Scalar::Bool(true));
14324 }
14325
14326 #[test]
14327 fn first_and_last_return_endpoints() {
14328 let col = Column::from_values(vec![
14329 Scalar::Int64(10),
14330 Scalar::Int64(20),
14331 Scalar::Int64(30),
14332 ])
14333 .expect("col");
14334 assert_eq!(col.first(), Some(&Scalar::Int64(10)));
14335 assert_eq!(col.last(), Some(&Scalar::Int64(30)));
14336
14337 let empty = Column::from_values(Vec::<Scalar>::new()).expect("col");
14338 assert_eq!(empty.first(), None);
14339 assert_eq!(empty.last(), None);
14340 }
14341
14342 #[test]
14343 fn count_matching_ignores_missing_and_mismatches() {
14344 let col = Column::from_values(vec![
14345 Scalar::Int64(1),
14346 Scalar::Int64(2),
14347 Scalar::Null(NullKind::NaN),
14348 Scalar::Int64(4),
14349 Scalar::Int64(6),
14350 ])
14351 .expect("col");
14352 let evens =
14353 col.count_matching(|v| v.to_f64().map(|f| f as i64 % 2 == 0).unwrap_or(false));
14354 assert_eq!(evens, 3); }
14356
14357 #[test]
14358 fn zip_with_elementwise_combine() {
14359 let a = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
14360 .expect("a");
14361 let b = Column::from_values(vec![
14362 Scalar::Int64(10),
14363 Scalar::Int64(20),
14364 Scalar::Int64(30),
14365 ])
14366 .expect("b");
14367 let sum = a
14368 .zip_with(&b, |l, r| match (l.to_f64(), r.to_f64()) {
14369 (Ok(lf), Ok(rf)) => Scalar::Float64(lf + rf),
14370 _ => Scalar::Null(NullKind::NaN),
14371 })
14372 .expect("zip_with");
14373 assert_eq!(sum.values()[0], Scalar::Float64(11.0));
14374 assert_eq!(sum.values()[1], Scalar::Float64(22.0));
14375 assert_eq!(sum.values()[2], Scalar::Float64(33.0));
14376 }
14377
14378 #[test]
14379 fn zip_with_length_mismatch_errors() {
14380 let a = Column::from_values(vec![Scalar::Int64(1)]).expect("a");
14381 let b = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("b");
14382 assert!(a.zip_with(&b, |l, _| l.clone()).is_err());
14383 }
14384
14385 #[test]
14386 fn iter_enumerate_yields_positions() {
14387 let col = Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(20)]).expect("col");
14388 let collected: Vec<_> = col.iter_enumerate().map(|(i, v)| (i, v.clone())).collect();
14389 assert_eq!(
14390 collected,
14391 vec![(0, Scalar::Int64(10)), (1, Scalar::Int64(20))]
14392 );
14393 }
14394
14395 #[test]
14396 fn apply_bool_missing_maps_to_false() {
14397 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Null(NullKind::NaN)])
14398 .expect("col");
14399 let result = col.apply_bool(|_| true).expect("apply_bool");
14400 assert_eq!(result.values()[0], Scalar::Bool(true));
14401 assert_eq!(result.values()[1], Scalar::Bool(false));
14403 }
14404 }
14405
14406 mod take_slice_concat_repeat {
14407 use super::*;
14408
14409 #[test]
14410 fn take_reorders_rows() {
14411 let col = Column::from_values(vec![
14412 Scalar::Int64(10),
14413 Scalar::Int64(20),
14414 Scalar::Int64(30),
14415 ])
14416 .expect("col");
14417 let picked = col.take(&[2, 0, 1]).expect("take");
14418 assert_eq!(picked.values()[0], Scalar::Int64(30));
14419 assert_eq!(picked.values()[1], Scalar::Int64(10));
14420 assert_eq!(picked.values()[2], Scalar::Int64(20));
14421 }
14422
14423 #[test]
14424 fn take_out_of_bounds_errors() {
14425 let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
14426 let err = col.take(&[5]).unwrap_err();
14427 assert!(matches!(err, crate::ColumnError::LengthMismatch { .. }));
14428 }
14429
14430 #[test]
14431 fn slice_returns_contiguous_range() {
14432 let col = Column::from_values(vec![
14433 Scalar::Int64(1),
14434 Scalar::Int64(2),
14435 Scalar::Int64(3),
14436 Scalar::Int64(4),
14437 ])
14438 .expect("col");
14439 let middle = col.slice(1, 2).expect("slice");
14440 assert_eq!(middle.len(), 2);
14441 assert_eq!(middle.values()[0], Scalar::Int64(2));
14442 assert_eq!(middle.values()[1], Scalar::Int64(3));
14443 }
14444
14445 #[test]
14446 fn slice_past_end_yields_empty() {
14447 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14448 let empty = col.slice(10, 5).expect("slice");
14449 assert!(empty.is_empty());
14450 assert_eq!(empty.dtype(), DType::Int64);
14451 }
14452
14453 #[test]
14454 fn slice_len_clamps_to_tail() {
14455 let col = Column::from_values(vec![
14456 Scalar::Float64(1.0),
14457 Scalar::Float64(2.0),
14458 Scalar::Float64(3.0),
14459 ])
14460 .expect("col");
14461 let tail = col.slice(2, 100).expect("slice");
14462 assert_eq!(tail.len(), 1);
14463 assert_eq!(tail.values()[0], Scalar::Float64(3.0));
14464 }
14465
14466 #[test]
14467 fn slice_huge_len_clamps_without_overflow() {
14468 let col =
14469 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
14470 .expect("col");
14471 let tail = col.slice(1, usize::MAX).expect("slice");
14472 assert_eq!(tail.values(), &[Scalar::Int64(2), Scalar::Int64(3)]);
14473 }
14474
14475 #[test]
14476 fn head_returns_first_n_values() {
14477 let col = Column::from_values(vec![
14478 Scalar::Int64(10),
14479 Scalar::Int64(20),
14480 Scalar::Int64(30),
14481 Scalar::Int64(40),
14482 ])
14483 .expect("col");
14484 let out = col.head(2).expect("head");
14485 assert_eq!(out.values(), &[Scalar::Int64(10), Scalar::Int64(20)]);
14486 }
14487
14488 #[test]
14489 fn tail_returns_last_n_values() {
14490 let col = Column::from_values(vec![
14491 Scalar::Int64(10),
14492 Scalar::Int64(20),
14493 Scalar::Int64(30),
14494 Scalar::Int64(40),
14495 ])
14496 .expect("col");
14497 let out = col.tail(2).expect("tail");
14498 assert_eq!(out.values(), &[Scalar::Int64(30), Scalar::Int64(40)]);
14499 }
14500
14501 #[test]
14502 fn head_tail_negative_n_match_pandas_style() {
14503 let col = Column::from_values(vec![
14504 Scalar::Int64(10),
14505 Scalar::Int64(20),
14506 Scalar::Int64(30),
14507 Scalar::Int64(40),
14508 Scalar::Int64(50),
14509 ])
14510 .expect("col");
14511 let head = col.head(-2).expect("head");
14512 let tail = col.tail(-2).expect("tail");
14513 assert_eq!(
14514 head.values(),
14515 &[Scalar::Int64(10), Scalar::Int64(20), Scalar::Int64(30)]
14516 );
14517 assert_eq!(
14518 tail.values(),
14519 &[Scalar::Int64(30), Scalar::Int64(40), Scalar::Int64(50)]
14520 );
14521 }
14522
14523 #[test]
14524 fn head_tail_large_negative_n_saturate_to_empty() {
14525 let col = Column::from_values(vec![
14526 Scalar::Float64(1.0),
14527 Scalar::Float64(2.0),
14528 Scalar::Float64(3.0),
14529 ])
14530 .expect("col");
14531 let head = col.head(-10).expect("head");
14532 let tail = col.tail(-10).expect("tail");
14533 assert!(head.is_empty());
14534 assert!(tail.is_empty());
14535 assert_eq!(head.dtype(), DType::Float64);
14536 assert_eq!(tail.dtype(), DType::Float64);
14537 }
14538
14539 #[test]
14540 fn concat_appends_same_dtype() {
14541 let a = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("a");
14542 let b = Column::from_values(vec![Scalar::Int64(3)]).expect("b");
14543 let combined = a.concat(&b).expect("concat");
14544 assert_eq!(combined.len(), 3);
14545 assert_eq!(combined.values()[2], Scalar::Int64(3));
14546 }
14547
14548 #[test]
14549 fn concat_different_dtypes_errors() {
14550 let a = Column::from_values(vec![Scalar::Int64(1)]).expect("a");
14551 let b = Column::from_values(vec![Scalar::Utf8("x".into())]).expect("b");
14552 let err = a.concat(&b).unwrap_err();
14553 assert!(matches!(err, crate::ColumnError::DTypeMismatch { .. }));
14554 }
14555
14556 #[test]
14557 fn repeat_duplicates_contiguously() {
14558 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14559 let out = col.repeat(3).expect("repeat");
14560 assert_eq!(out.len(), 6);
14561 assert_eq!(out.values()[0], Scalar::Int64(1));
14562 assert_eq!(out.values()[1], Scalar::Int64(1));
14563 assert_eq!(out.values()[2], Scalar::Int64(1));
14564 assert_eq!(out.values()[3], Scalar::Int64(2));
14565 }
14566
14567 #[test]
14568 fn repeat_zero_is_empty_same_dtype() {
14569 let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
14570 let out = col.repeat(0).expect("repeat");
14571 assert!(out.is_empty());
14572 assert_eq!(out.dtype(), DType::Int64);
14573 }
14574
14575 #[test]
14576 fn repeat_one_is_clone() {
14577 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14578 let out = col.repeat(1).expect("repeat");
14579 assert_eq!(out.values(), col.values());
14580 }
14581 }
14582
14583 mod reverse_head_tail_cumulatives_unique {
14584 use fp_types::NullKind;
14585
14586 use super::*;
14587
14588 #[test]
14589 fn reverse_swaps_order_and_preserves_dtype() {
14590 let col =
14591 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
14592 .expect("col");
14593 let r = col.reverse().expect("reverse");
14594 assert_eq!(r.values()[0], Scalar::Int64(3));
14595 assert_eq!(r.values()[2], Scalar::Int64(1));
14596 assert_eq!(r.dtype(), DType::Int64);
14597 }
14598
14599 #[test]
14600 fn head_positive_takes_first_n() {
14601 let col = Column::from_values(vec![
14602 Scalar::Int64(1),
14603 Scalar::Int64(2),
14604 Scalar::Int64(3),
14605 Scalar::Int64(4),
14606 ])
14607 .expect("col");
14608 let h = col.head(2).expect("head");
14609 assert_eq!(h.len(), 2);
14610 assert_eq!(h.values()[0], Scalar::Int64(1));
14611 assert_eq!(h.values()[1], Scalar::Int64(2));
14612 }
14613
14614 #[test]
14615 fn head_negative_drops_last_n() {
14616 let col =
14617 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
14618 .expect("col");
14619 let h = col.head(-1).expect("head");
14620 assert_eq!(h.len(), 2);
14621 assert_eq!(h.values()[1], Scalar::Int64(2));
14622 }
14623
14624 #[test]
14625 fn tail_positive_takes_last_n() {
14626 let col = Column::from_values(vec![
14627 Scalar::Int64(1),
14628 Scalar::Int64(2),
14629 Scalar::Int64(3),
14630 Scalar::Int64(4),
14631 ])
14632 .expect("col");
14633 let t = col.tail(2).expect("tail");
14634 assert_eq!(t.len(), 2);
14635 assert_eq!(t.values()[0], Scalar::Int64(3));
14636 assert_eq!(t.values()[1], Scalar::Int64(4));
14637 }
14638
14639 #[test]
14640 fn tail_negative_drops_first_n() {
14641 let col =
14642 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
14643 .expect("col");
14644 let t = col.tail(-1).expect("tail");
14645 assert_eq!(t.len(), 2);
14646 assert_eq!(t.values()[0], Scalar::Int64(2));
14647 }
14648
14649 #[test]
14650 fn head_tail_out_of_range_clamps() {
14651 let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
14652 assert_eq!(col.head(10).expect("head").len(), 1);
14653 assert_eq!(col.tail(10).expect("tail").len(), 1);
14654 assert_eq!(col.head(-10).expect("head").len(), 0);
14655 assert_eq!(col.tail(-10).expect("tail").len(), 0);
14656 }
14657
14658 #[test]
14659 fn cumsum_produces_float64_running_sum() {
14660 let col = Column::from_values(vec![
14661 Scalar::Float64(1.0),
14662 Scalar::Null(NullKind::NaN),
14663 Scalar::Float64(3.0),
14664 ])
14665 .expect("col");
14666 let c = col.cumsum().expect("cumsum");
14667 assert_eq!(c.dtype(), DType::Float64);
14668 assert_eq!(c.values()[0], Scalar::Float64(1.0));
14669 assert!(c.values()[1].is_missing());
14670 assert_eq!(c.values()[2], Scalar::Float64(4.0));
14671 }
14672
14673 #[test]
14674 fn cumprod_running_product() {
14675 let col = Column::from_values(vec![
14676 Scalar::Float64(2.0),
14677 Scalar::Float64(3.0),
14678 Scalar::Float64(4.0),
14679 ])
14680 .expect("col");
14681 let c = col.cumprod().expect("cumprod");
14682 assert_eq!(c.values()[2], Scalar::Float64(24.0));
14683 }
14684
14685 #[test]
14686 fn cummax_cummin_running_extrema() {
14687 let col = Column::from_values(vec![
14688 Scalar::Float64(3.0),
14689 Scalar::Float64(1.0),
14690 Scalar::Float64(4.0),
14691 Scalar::Float64(1.0),
14692 Scalar::Float64(5.0),
14693 ])
14694 .expect("col");
14695 let mx = col.cummax().expect("cummax");
14696 assert_eq!(mx.values()[4], Scalar::Float64(5.0));
14697 let mn = col.cummin().expect("cummin");
14698 assert_eq!(mn.values()[4], Scalar::Float64(1.0));
14699 }
14700
14701 #[test]
14702 fn unique_preserves_first_seen_order() {
14703 let col = Column::from_values(vec![
14704 Scalar::Int64(3),
14705 Scalar::Int64(1),
14706 Scalar::Int64(3),
14707 Scalar::Int64(2),
14708 Scalar::Int64(1),
14709 ])
14710 .expect("col");
14711 let u = col.unique().expect("unique");
14712 assert_eq!(u.len(), 3);
14713 assert_eq!(u.values()[0], Scalar::Int64(3));
14714 assert_eq!(u.values()[1], Scalar::Int64(1));
14715 assert_eq!(u.values()[2], Scalar::Int64(2));
14716 }
14717
14718 #[test]
14719 fn unique_drops_nulls() {
14720 let col = Column::from_values(vec![
14721 Scalar::Int64(1),
14722 Scalar::Null(NullKind::NaN),
14723 Scalar::Int64(1),
14724 Scalar::Null(NullKind::NaN),
14725 ])
14726 .expect("col");
14727 let u = col.unique().expect("unique");
14728 assert_eq!(u.len(), 1);
14729 assert_eq!(u.values()[0], Scalar::Int64(1));
14730 }
14731 }
14732
14733 mod abs_shift_clip_round_isin {
14734 use fp_types::NullKind;
14735
14736 use super::*;
14737
14738 #[test]
14739 fn abs_int_and_float() {
14740 let int_col =
14741 Column::from_values(vec![Scalar::Int64(-3), Scalar::Int64(0), Scalar::Int64(5)])
14742 .expect("int");
14743 let a = int_col.abs().expect("abs");
14744 assert_eq!(a.values()[0], Scalar::Int64(3));
14745 assert_eq!(a.values()[1], Scalar::Int64(0));
14746
14747 let float_col =
14748 Column::from_values(vec![Scalar::Float64(-1.5), Scalar::Null(NullKind::NaN)])
14749 .expect("float");
14750 let b = float_col.abs().expect("abs");
14751 assert_eq!(b.values()[0], Scalar::Float64(1.5));
14752 assert!(b.values()[1].is_missing());
14753 }
14754
14755 #[test]
14756 fn abs_bool_preserves_dtype() {
14757 let bool_col =
14758 Column::from_values(vec![Scalar::Bool(true), Scalar::Bool(false)]).expect("bool");
14759 let result = bool_col.abs().expect("abs");
14760 assert_eq!(result.dtype(), DType::Bool);
14761 assert_eq!(result.values(), &[Scalar::Bool(true), Scalar::Bool(false)]);
14762 }
14763
14764 #[test]
14765 fn abs_utf8_errors() {
14766 let col = Column::from_values(vec![Scalar::Utf8("x".into())]).expect("col");
14767 assert!(col.abs().is_err());
14768 }
14769
14770 #[test]
14771 fn shift_positive_pads_left_with_fill() {
14772 let col =
14773 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
14774 .expect("col");
14775 let s = col.shift(1, Scalar::Null(NullKind::NaN)).expect("shift");
14776 assert!(s.values()[0].is_missing());
14777 assert_eq!(s.values()[1], Scalar::Int64(1));
14778 assert_eq!(s.values()[2], Scalar::Int64(2));
14779 }
14780
14781 #[test]
14782 fn shift_negative_pads_right() {
14783 let col =
14784 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
14785 .expect("col");
14786 let s = col.shift(-1, Scalar::Int64(0)).expect("shift");
14787 assert_eq!(s.values()[0], Scalar::Int64(2));
14788 assert_eq!(s.values()[1], Scalar::Int64(3));
14789 assert_eq!(s.values()[2], Scalar::Int64(0));
14790 }
14791
14792 #[test]
14793 fn shift_zero_is_clone() {
14794 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14795 let s = col.shift(0, Scalar::Int64(-1)).expect("shift");
14796 assert_eq!(s.values(), col.values());
14797 }
14798
14799 #[test]
14800 fn clip_both_bounds() {
14801 let col = Column::from_values(vec![
14802 Scalar::Float64(-5.0),
14803 Scalar::Float64(3.0),
14804 Scalar::Float64(10.0),
14805 ])
14806 .expect("col");
14807 let c = col.clip(Some(0.0), Some(5.0)).expect("clip");
14808 assert_eq!(c.values()[0], Scalar::Float64(0.0));
14809 assert_eq!(c.values()[1], Scalar::Float64(3.0));
14810 assert_eq!(c.values()[2], Scalar::Float64(5.0));
14811 }
14812
14813 #[test]
14814 fn clip_none_bounds_are_noop() {
14815 let col = Column::from_values(vec![Scalar::Float64(-5.0), Scalar::Float64(10.0)])
14816 .expect("col");
14817 let c = col.clip(None, None).expect("clip");
14818 assert_eq!(c.values()[0], Scalar::Float64(-5.0));
14819 assert_eq!(c.values()[1], Scalar::Float64(10.0));
14820 }
14821
14822 #[test]
14823 fn round_rounds_floats() {
14824 let col = Column::from_values(vec![
14825 Scalar::Float64(1.234),
14826 Scalar::Float64(5.678),
14827 Scalar::Null(NullKind::NaN),
14828 ])
14829 .expect("col");
14830 let r = col.round(1).expect("round");
14831 assert_eq!(r.values()[0], Scalar::Float64(1.2));
14832 assert_eq!(r.values()[1], Scalar::Float64(5.7));
14833 assert!(r.values()[2].is_missing());
14834 }
14835
14836 #[test]
14837 fn round_int_nonnegative_decimals_is_noop() {
14838 let col = Column::from_values(vec![Scalar::Int64(12), Scalar::Int64(34)]).expect("col");
14839 let r = col.round(2).expect("round");
14840 assert_eq!(r.values(), col.values());
14841 assert_eq!(r.dtype(), DType::Int64);
14842 }
14843
14844 #[test]
14845 fn round_int_negative_decimals_preserves_dtype() {
14846 let col = Column::from_values(vec![
14847 Scalar::Int64(15),
14848 Scalar::Int64(25),
14849 Scalar::Int64(35),
14850 Scalar::Int64(-15),
14851 ])
14852 .expect("col");
14853 let r = col.round(-1).expect("round");
14854 assert_eq!(r.dtype(), DType::Int64);
14855 assert_eq!(
14856 r.values(),
14857 &[
14858 Scalar::Int64(20),
14859 Scalar::Int64(20),
14860 Scalar::Int64(40),
14861 Scalar::Int64(-20)
14862 ]
14863 );
14864 }
14865
14866 #[test]
14867 fn round_bool_is_noop() {
14868 let col =
14869 Column::from_values(vec![Scalar::Bool(true), Scalar::Bool(false)]).expect("col");
14870 let r = col.round(-2).expect("round");
14871 assert_eq!(r.dtype(), DType::Bool);
14872 assert_eq!(r.values(), col.values());
14873 }
14874
14875 #[test]
14876 fn round_negative_decimals_rounds_left() {
14877 let col = Column::from_values(vec![Scalar::Float64(1234.0)]).expect("col");
14878 let r = col.round(-2).expect("round");
14879 assert_eq!(r.values()[0], Scalar::Float64(1200.0));
14880 }
14881
14882 #[test]
14883 fn round_uses_pandas_half_even_ties() {
14884 let col = Column::from_values(vec![
14885 Scalar::Float64(1.5),
14886 Scalar::Float64(2.5),
14887 Scalar::Float64(-1.5),
14888 Scalar::Float64(3.5),
14889 ])
14890 .expect("col");
14891 let r = col.round(0).expect("round");
14892 assert_eq!(
14893 r.values(),
14894 &[
14895 Scalar::Float64(2.0),
14896 Scalar::Float64(2.0),
14897 Scalar::Float64(-2.0),
14898 Scalar::Float64(4.0)
14899 ]
14900 );
14901 }
14902
14903 #[test]
14904 fn round_negative_decimals_uses_half_even_ties() {
14905 let col = Column::from_values(vec![
14906 Scalar::Float64(15.0),
14907 Scalar::Float64(25.0),
14908 Scalar::Float64(35.0),
14909 Scalar::Float64(-15.0),
14910 ])
14911 .expect("col");
14912 let r = col.round(-1).expect("round");
14913 assert_eq!(
14914 r.values(),
14915 &[
14916 Scalar::Float64(20.0),
14917 Scalar::Float64(20.0),
14918 Scalar::Float64(40.0),
14919 Scalar::Float64(-20.0)
14920 ]
14921 );
14922 }
14923
14924 #[test]
14925 fn isin_returns_bool_column() {
14926 let col = Column::from_values(vec![
14927 Scalar::Int64(1),
14928 Scalar::Int64(2),
14929 Scalar::Int64(3),
14930 Scalar::Null(NullKind::NaN),
14931 ])
14932 .expect("col");
14933 let needles = vec![Scalar::Int64(1), Scalar::Int64(3)];
14934 let r = col.isin(&needles).expect("isin");
14935 assert_eq!(r.dtype(), DType::Bool);
14936 assert_eq!(r.values()[0], Scalar::Bool(true));
14937 assert_eq!(r.values()[1], Scalar::Bool(false));
14938 assert_eq!(r.values()[2], Scalar::Bool(true));
14939 assert_eq!(r.values()[3], Scalar::Bool(false));
14940 }
14941
14942 #[test]
14943 fn isin_empty_needles_yields_all_false() {
14944 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
14945 let r = col.isin(&[]).expect("isin");
14946 assert_eq!(r.values()[0], Scalar::Bool(false));
14947 assert_eq!(r.values()[1], Scalar::Bool(false));
14948 }
14949 }
14950
14951 mod sort_diff_duplicated_between {
14952 use fp_types::NullKind;
14953
14954 use super::*;
14955
14956 #[test]
14957 fn sort_values_ascending_puts_nulls_last() {
14958 let col = Column::from_values(vec![
14959 Scalar::Int64(3),
14960 Scalar::Null(NullKind::NaN),
14961 Scalar::Int64(1),
14962 Scalar::Int64(2),
14963 ])
14964 .expect("col");
14965 let s = col.sort_values(true).expect("sort");
14966 assert_eq!(s.values()[0], Scalar::Int64(1));
14967 assert_eq!(s.values()[1], Scalar::Int64(2));
14968 assert_eq!(s.values()[2], Scalar::Int64(3));
14969 assert!(s.values()[3].is_missing());
14970 }
14971
14972 #[test]
14973 fn sort_values_descending_keeps_nulls_last() {
14974 let col = Column::from_values(vec![
14975 Scalar::Int64(1),
14976 Scalar::Null(NullKind::NaN),
14977 Scalar::Int64(3),
14978 Scalar::Int64(2),
14979 ])
14980 .expect("col");
14981 let s = col.sort_values(false).expect("sort");
14982 assert_eq!(s.values()[0], Scalar::Int64(3));
14983 assert_eq!(s.values()[1], Scalar::Int64(2));
14984 assert_eq!(s.values()[2], Scalar::Int64(1));
14985 assert!(s.values()[3].is_missing());
14986 }
14987
14988 #[test]
14989 fn argsort_matches_take_sort_values() {
14990 let col =
14991 Column::from_values(vec![Scalar::Int64(3), Scalar::Int64(1), Scalar::Int64(2)])
14992 .expect("col");
14993 let positions = col.argsort();
14994 assert_eq!(positions, vec![1, 2, 0]);
14995 let via_take = col.take(&positions).expect("take");
14996 let via_sort = col.sort_values(true).expect("sort");
14997 assert_eq!(via_take.values(), via_sort.values());
14998 }
14999
15000 fn scalar_sort_reference(values: &[Scalar], ascending: bool) -> Vec<Scalar> {
15003 let mut indexed: Vec<(usize, &Scalar)> = values.iter().enumerate().collect();
15004 indexed.sort_by(|a, b| crate::compare_scalars_na_last(a.1, b.1, ascending));
15005 indexed.into_iter().map(|(_, v)| v.clone()).collect()
15006 }
15007
15008 #[test]
15009 fn radix_sort_matches_scalar_reference_i64_and_f64() {
15010 let mut state: u64 = 0x9E37_79B9_7F4A_7C15;
15014 let mut next = || {
15015 state = state
15016 .wrapping_mul(6364136223846793005)
15017 .wrapping_add(1442695040888963407);
15018 state
15019 };
15020 for trial in 0..200 {
15021 let n = (next() % 400) as usize + 1;
15022 let i64_vals: Vec<Scalar> = (0..n)
15023 .map(|_| {
15024 let r = next();
15026 let v = if r % 7 == 0 {
15027 r as i64 } else {
15029 (r % 11) as i64 - 5
15030 };
15031 Scalar::Int64(v)
15032 })
15033 .collect();
15034 let f64_vals: Vec<Scalar> = i64_vals
15035 .iter()
15036 .map(|s| match s {
15037 Scalar::Int64(v) => {
15038 let f = (*v as f64) / 4.0;
15040 Scalar::Float64(if f == 0.0 { 0.0 } else { f })
15041 }
15042 _ => unreachable!(),
15043 })
15044 .collect();
15045 for (vals, label) in [(&i64_vals, "i64"), (&f64_vals, "f64")] {
15046 let col = Column::from_values(vals.clone()).expect("col");
15047 assert!(
15049 col.validity.all(),
15050 "{label} trial {trial}: unexpected missing"
15051 );
15052 for ascending in [true, false] {
15053 let got = col.sort_values(ascending).expect("sort").values().to_vec();
15054 let want = scalar_sort_reference(vals, ascending);
15055 assert_eq!(
15056 got, want,
15057 "{label} trial {trial} asc={ascending} sort mismatch"
15058 );
15059 }
15060 let perm = col.argsort();
15062 let via_perm: Vec<Scalar> = perm.iter().map(|&i| vals[i].clone()).collect();
15063 assert_eq!(
15064 via_perm,
15065 scalar_sort_reference(vals, true),
15066 "{label} trial {trial} argsort mismatch"
15067 );
15068 }
15069 }
15070 }
15071
15072 #[test]
15073 fn contiguous_utf8_argsort_matches_scalar_reference() {
15074 let raw = ["bee", "alpha", "bee", "alphabet", "", "zulu"];
15075 let scalars: Vec<Scalar> = raw
15076 .iter()
15077 .map(|value| Scalar::Utf8((*value).to_owned()))
15078 .collect();
15079 let scalar_col = Column::from_values(scalars.clone()).expect("scalar col");
15080 let mut bytes = Vec::new();
15081 let mut offsets = Vec::with_capacity(raw.len() + 1);
15082 offsets.push(0);
15083 for value in raw {
15084 bytes.extend_from_slice(value.as_bytes());
15085 offsets.push(bytes.len());
15086 }
15087 let contiguous_col = Column::from_utf8_contiguous(bytes, offsets);
15088
15089 assert_eq!(contiguous_col.argsort_with(true), vec![4, 1, 3, 0, 2, 5]);
15090 assert_eq!(contiguous_col.argsort_with(false), vec![5, 0, 2, 3, 1, 4]);
15091 for ascending in [true, false] {
15092 let got = contiguous_col
15093 .sort_values(ascending)
15094 .expect("contiguous sort")
15095 .values()
15096 .to_vec();
15097 let want = scalar_col
15098 .sort_values(ascending)
15099 .expect("scalar sort")
15100 .values()
15101 .to_vec();
15102 assert_eq!(got, want, "ascending={ascending}");
15103 }
15104 }
15105
15106 #[test]
15107 fn contiguous_utf8_strict_witness_matches_byte_order_483i5() {
15108 fn contiguous(values: &[&str]) -> Column {
15109 let mut bytes = Vec::new();
15110 let mut offsets = Vec::with_capacity(values.len() + 1);
15111 offsets.push(0);
15112 for value in values {
15113 bytes.extend_from_slice(value.as_bytes());
15114 offsets.push(bytes.len());
15115 }
15116 Column::from_utf8_contiguous(bytes, offsets)
15117 }
15118
15119 assert!(
15120 contiguous(&["a", "b", "c"])
15121 .as_strictly_increasing_utf8_contiguous()
15122 .is_some()
15123 );
15124 assert!(
15125 contiguous(&[])
15126 .as_strictly_increasing_utf8_contiguous()
15127 .is_some()
15128 );
15129 assert!(
15130 contiguous(&["only"])
15131 .as_strictly_increasing_utf8_contiguous()
15132 .is_some()
15133 );
15134 assert!(
15135 contiguous(&["a", "a"])
15136 .as_strictly_increasing_utf8_contiguous()
15137 .is_none()
15138 );
15139 assert!(
15140 contiguous(&["b", "a"])
15141 .as_strictly_increasing_utf8_contiguous()
15142 .is_none()
15143 );
15144
15145 let scalar_backed = Column::from_values(vec![
15146 Scalar::Utf8("a".to_owned()),
15147 Scalar::Utf8("b".to_owned()),
15148 ])
15149 .expect("scalar-backed utf8");
15150 assert!(
15151 scalar_backed
15152 .as_strictly_increasing_utf8_contiguous()
15153 .is_none()
15154 );
15155 }
15156
15157 #[test]
15158 fn abs_typed_matches_scalar_reference() {
15159 let mut state: u64 = 0x2545_F491_4F6C_DD1D;
15163 let mut next = || {
15164 state = state
15165 .wrapping_mul(6364136223846793005)
15166 .wrapping_add(1442695040888963407);
15167 state
15168 };
15169 for trial in 0..150 {
15170 let n = (next() % 300) as usize + 1;
15171 let i64_vals: Vec<Scalar> = (0..n)
15172 .map(|_| {
15173 let r = next();
15174 if r % 50 == 0 {
15175 Scalar::Int64(i64::MIN)
15176 } else {
15177 Scalar::Int64((r % 2000) as i64 - 1000)
15178 }
15179 })
15180 .collect();
15181 let f64_vals: Vec<Scalar> = i64_vals
15182 .iter()
15183 .map(|s| match s {
15184 Scalar::Int64(v) => {
15185 let f = if *v == i64::MIN {
15186 -0.0
15187 } else {
15188 *v as f64 / 4.0
15189 };
15190 Scalar::Float64(f)
15191 }
15192 _ => unreachable!(),
15193 })
15194 .collect();
15195 for vals in [&i64_vals, &f64_vals] {
15196 let col = Column::from_values(vals.clone()).expect("col");
15197 let got = col.abs().expect("abs").values().to_vec();
15198 let want: Vec<Scalar> = vals
15199 .iter()
15200 .map(|v| match v {
15201 Scalar::Int64(x) => Scalar::Int64(x.wrapping_abs()),
15202 Scalar::Float64(x) => Scalar::Float64(x.abs()),
15203 other => other.clone(),
15204 })
15205 .collect();
15206 for (g, w) in got.iter().zip(&want) {
15208 match (g, w) {
15209 (Scalar::Float64(a), Scalar::Float64(b)) => {
15210 assert_eq!(a.to_bits(), b.to_bits(), "trial {trial} float abs")
15211 }
15212 _ => assert_eq!(g, w, "trial {trial} abs"),
15213 }
15214 }
15215 }
15216 }
15217 }
15218
15219 #[test]
15220 #[ignore = "timing benchmark; run with --ignored --nocapture on the rch VM"]
15221 fn abs_typed_timing_vs_scalar() {
15222 use std::time::Instant;
15223 let n = 5_000_000usize;
15224 let iters = 10;
15225 let mut state: u64 = 0x9E37_79B9_7F4A_7C15;
15226 let mut next = || {
15227 state = state
15228 .wrapping_mul(6364136223846793005)
15229 .wrapping_add(1442695040888963407);
15230 state
15231 };
15232 let data: Vec<f64> = (0..n)
15233 .map(|_| (next() % 2_000_000) as f64 - 1_000_000.0)
15234 .collect();
15235 let mk = || Column::from_f64_values(data.clone());
15236
15237 let t0 = Instant::now();
15238 let mut chk = 0usize;
15239 for _ in 0..iters {
15240 chk ^= mk().abs().unwrap().len();
15241 }
15242 let typed = t0.elapsed();
15243
15244 let t1 = Instant::now();
15245 let mut chk2 = 0usize;
15246 for _ in 0..iters {
15247 let col = mk();
15248 let out: Vec<Scalar> = col
15249 .values()
15250 .iter()
15251 .map(|v| match v {
15252 Scalar::Float64(x) => Scalar::Float64(x.abs()),
15253 other => other.clone(),
15254 })
15255 .collect();
15256 chk2 ^= Column::new(DType::Float64, out).unwrap().len();
15257 }
15258 let scalar = t1.elapsed();
15259 let t2 = Instant::now();
15260 let mut sink = 0usize;
15261 for _ in 0..iters {
15262 sink ^= mk().len();
15263 }
15264 let build = t2.elapsed();
15265 let typed_op = typed.saturating_sub(build).as_secs_f64();
15266 let scalar_op = scalar.saturating_sub(build).as_secs_f64();
15267 eprintln!(
15268 "abs 5M f64 x{iters}: typed={typed:?} scalar={scalar:?} build={build:?} \
15269 op-only ratio={:.2}x (full {:.2}x, chk {chk}/{chk2}/{sink})",
15270 scalar_op / typed_op,
15271 scalar.as_secs_f64() / typed.as_secs_f64()
15272 );
15273 }
15274
15275 #[test]
15276 fn factorize_direct_address_matches_reference() {
15277 let mut state: u64 = 0x51A4_3C29_7E10_BB67;
15282 let mut next = || {
15283 state = state
15284 .wrapping_mul(6364136223846793005)
15285 .wrapping_add(1442695040888963407);
15286 state
15287 };
15288 for trial in 0..150 {
15289 let n = (next() % 300) as usize + 1;
15290 let data: Vec<i64> = (0..n).map(|_| (next() % 13) as i64 - 6).collect();
15291
15292 for sort in [false, true] {
15293 let mut uniques: Vec<i64> = Vec::new();
15296 let mut codes: Vec<i64> = Vec::with_capacity(n);
15297 for &v in &data {
15298 match uniques.iter().position(|&u| u == v) {
15299 Some(p) => codes.push(p as i64),
15300 None => {
15301 codes.push(uniques.len() as i64);
15302 uniques.push(v);
15303 }
15304 }
15305 }
15306 if sort {
15307 let mut order: Vec<usize> = (0..uniques.len()).collect();
15308 order.sort_by(|&a, &b| uniques[a].cmp(&uniques[b]));
15309 let mut remap = vec![0i64; uniques.len()];
15310 let sorted: Vec<i64> = order
15311 .iter()
15312 .enumerate()
15313 .map(|(new_pos, &orig)| {
15314 remap[orig] = new_pos as i64;
15315 uniques[orig]
15316 })
15317 .collect();
15318 for c in &mut codes {
15319 *c = remap[*c as usize];
15320 }
15321 uniques = sorted;
15322 }
15323
15324 let col = Column::from_values(data.iter().map(|&v| Scalar::Int64(v)).collect())
15325 .expect("col");
15326 let (code_col, uniq_col) =
15327 col.factorize_with_options(sort, true).expect("factorize");
15328 let got_codes: Vec<i64> = code_col
15329 .values()
15330 .iter()
15331 .filter_map(|v| match v {
15332 Scalar::Int64(c) => Some(*c),
15333 _ => None,
15334 })
15335 .collect();
15336 let got_uniques: Vec<i64> = uniq_col
15337 .values()
15338 .iter()
15339 .filter_map(|v| match v {
15340 Scalar::Int64(c) => Some(*c),
15341 _ => None,
15342 })
15343 .collect();
15344 assert_eq!(got_codes.len(), code_col.len(), "non-int code");
15345 assert_eq!(got_uniques.len(), uniq_col.len(), "non-int unique");
15346 assert_eq!(got_codes, codes, "trial {trial} sort={sort} codes");
15347 assert_eq!(got_uniques, uniques, "trial {trial} sort={sort} uniques");
15348 }
15349 }
15350 }
15351
15352 #[test]
15353 #[ignore = "timing benchmark; run with --ignored --nocapture on the rch VM"]
15354 fn factorize_direct_address_timing_vs_hashmap() {
15355 use std::{collections::HashMap, time::Instant};
15356 let n = 5_000_000usize;
15357 let iters = 10;
15358 for cardinality in [1_000u64, 2_000_000u64] {
15359 let mut state: u64 = 0x2468_ACE0_1357_9BDF ^ cardinality;
15360 let mut next = || {
15361 state = state
15362 .wrapping_mul(6364136223846793005)
15363 .wrapping_add(1442695040888963407);
15364 state
15365 };
15366 let data: Vec<i64> = (0..n).map(|_| (next() % cardinality) as i64).collect();
15367
15368 let col = Column::from_i64_values(data.clone());
15369 let t0 = Instant::now();
15370 let mut chk = 0i64;
15371 for _ in 0..iters {
15372 let (codes, _u) = col.factorize_with_options(false, true).expect("da");
15373 if let Scalar::Int64(c) = &codes.values()[n - 1] {
15374 chk ^= *c;
15375 }
15376 }
15377 let direct = t0.elapsed();
15378
15379 let scalar_col =
15381 Column::from_values(data.iter().map(|&v| Scalar::Int64(v)).collect())
15382 .expect("col");
15383 let t1 = Instant::now();
15384 let mut chk2 = 0i64;
15385 for _ in 0..iters {
15386 let mut map: HashMap<i64, i64> = HashMap::new();
15387 let mut uniques = 0i64;
15388 let mut last = 0i64;
15389 for v in scalar_col.values() {
15390 if let Scalar::Int64(i) = v {
15391 let code = *map.entry(*i).or_insert_with(|| {
15392 let c = uniques;
15393 uniques += 1;
15394 c
15395 });
15396 last = code;
15397 }
15398 }
15399 chk2 ^= last;
15400 }
15401 let scalar = t1.elapsed();
15402 eprintln!(
15403 "factorize 5M i64 card={cardinality} x{iters}: direct={direct:?} hashmap={scalar:?} ratio={:.2}x (chk {chk}/{chk2})",
15404 scalar.as_secs_f64() / direct.as_secs_f64()
15405 );
15406 }
15407 }
15408
15409 #[test]
15410 fn duplicated_typed_matches_bruteforce_reference() {
15411 let mut state: u64 = 0xD1B5_4A32_D192_ED03;
15417 let mut next = || {
15418 state = state
15419 .wrapping_mul(6364136223846793005)
15420 .wrapping_add(1442695040888963407);
15421 state
15422 };
15423 let fbits = |f: f64| (if f == 0.0 { 0.0 } else { f }).to_bits();
15424 for trial in 0..150 {
15425 let n = (next() % 300) as usize + 1;
15426 let raw: Vec<i64> = (0..n).map(|_| (next() % 9) as i64 - 4).collect();
15428 let i64_vals: Vec<Scalar> = raw.iter().map(|&v| Scalar::Int64(v)).collect();
15429 let f64_vals: Vec<Scalar> = raw
15430 .iter()
15431 .map(|&v| Scalar::Float64(v as f64 / 2.0))
15432 .collect();
15433 let i64_keys: Vec<i64> = raw.clone();
15434 let f64_keys: Vec<u64> = raw.iter().map(|&v| fbits(v as f64 / 2.0)).collect();
15435
15436 for keep in ["first", "last", "false"] {
15437 let bf = |eq_keys: &dyn Fn(usize, usize) -> bool| -> Vec<bool> {
15439 (0..n)
15440 .map(|i| match keep {
15441 "first" => (0..i).any(|j| eq_keys(i, j)),
15442 "last" => (i + 1..n).any(|j| eq_keys(i, j)),
15443 _ => (0..n).any(|j| j != i && eq_keys(i, j)),
15444 })
15445 .collect()
15446 };
15447 let want_i = bf(&|a, b| i64_keys[a] == i64_keys[b]);
15448 let want_f = bf(&|a, b| f64_keys[a] == f64_keys[b]);
15449
15450 let col_i = Column::from_values(i64_vals.clone()).expect("i64 col");
15451 let got_i: Vec<bool> = col_i
15452 .duplicated_keep(keep)
15453 .expect("dup i64")
15454 .values()
15455 .iter()
15456 .map(|v| matches!(v, Scalar::Bool(true)))
15457 .collect();
15458 assert_eq!(got_i, want_i, "i64 trial {trial} keep={keep}");
15459
15460 let col_f = Column::from_values(f64_vals.clone()).expect("f64 col");
15461 let got_f: Vec<bool> = col_f
15462 .duplicated_keep(keep)
15463 .expect("dup f64")
15464 .values()
15465 .iter()
15466 .map(|v| matches!(v, Scalar::Bool(true)))
15467 .collect();
15468 assert_eq!(got_f, want_f, "f64 trial {trial} keep={keep}");
15469 }
15470 }
15471 }
15472
15473 #[test]
15474 #[ignore = "timing benchmark; run with --ignored --nocapture on the rch VM"]
15475 fn duplicated_typed_timing_vs_scalar() {
15476 use std::{collections::HashSet, time::Instant};
15477 let n = 5_000_000usize;
15478 let iters = 10;
15479 #[derive(Hash, PartialEq, Eq)]
15482 enum OldKey {
15483 Int64(i64),
15484 Null,
15485 }
15486 for cardinality in [1_000u64, 2_000_000u64] {
15487 let mut state: u64 = 0x0FED_CBA9_8765_4321 ^ cardinality;
15488 let mut next = || {
15489 state = state
15490 .wrapping_mul(6364136223846793005)
15491 .wrapping_add(1442695040888963407);
15492 state
15493 };
15494 let data: Vec<i64> = (0..n).map(|_| (next() % cardinality) as i64).collect();
15495
15496 let col = Column::from_i64_values(data.clone());
15497 let t0 = Instant::now();
15498 let mut chk = 0usize;
15499 for _ in 0..iters {
15500 let d = col.duplicated_keep("first").expect("typed");
15501 chk ^= d
15502 .values()
15503 .iter()
15504 .filter(|v| matches!(v, Scalar::Bool(true)))
15505 .count();
15506 }
15507 let typed = t0.elapsed();
15508
15509 let scalar_col =
15510 Column::from_values(data.iter().map(|&v| Scalar::Int64(v)).collect())
15511 .expect("col");
15512 let t1 = Instant::now();
15513 let mut chk2 = 0usize;
15514 for _ in 0..iters {
15515 let mut seen: HashSet<OldKey> = HashSet::new();
15516 let mut count = 0usize;
15517 for v in scalar_col.values() {
15518 let key = if v.is_missing() {
15519 OldKey::Null
15520 } else if let Scalar::Int64(i) = v {
15521 OldKey::Int64(*i)
15522 } else {
15523 OldKey::Null
15524 };
15525 if !seen.insert(key) {
15526 count += 1;
15527 }
15528 }
15529 chk2 ^= count;
15530 }
15531 let scalar = t1.elapsed();
15532 eprintln!(
15533 "duplicated 5M i64 card={cardinality} x{iters}: typed={typed:?} old_keyenum_siphash={scalar:?} ratio={:.2}x (chk {chk}/{chk2})",
15534 scalar.as_secs_f64() / typed.as_secs_f64()
15535 );
15536 }
15537 }
15538
15539 #[test]
15540 #[ignore = "timing benchmark; run with --ignored --nocapture on the rch VM"]
15541 fn radix_sort_timing_vs_scalar() {
15542 use std::time::Instant;
15543 let n = 5_000_000usize;
15544 let mut state: u64 = 0x1234_5678_9ABC_DEF0;
15545 let mut next = || {
15546 state = state
15547 .wrapping_mul(6364136223846793005)
15548 .wrapping_add(1442695040888963407);
15549 state
15550 };
15551 let data: Vec<i64> = (0..n).map(|_| next() as i64).collect();
15552 let col = Column::from_i64_values(data.clone());
15553
15554 let iters = 10;
15555 let t0 = Instant::now();
15556 let mut checksum = 0i64;
15557 for _ in 0..iters {
15558 let sorted = col.sort_values(true).expect("radix");
15559 checksum ^= match &sorted.values()[0] {
15560 Scalar::Int64(v) => *v,
15561 _ => 0,
15562 };
15563 }
15564 let radix = t0.elapsed();
15565
15566 let scalar_col = Column::from_values(data.iter().map(|&v| Scalar::Int64(v)).collect())
15568 .expect("scalar col");
15569 let t1 = Instant::now();
15570 let mut checksum2 = 0i64;
15571 for _ in 0..iters {
15572 let mut indexed: Vec<(usize, &Scalar)> =
15573 scalar_col.values().iter().enumerate().collect();
15574 indexed.sort_by(|a, b| crate::compare_scalars_na_last(a.1, b.1, true));
15575 if let Scalar::Int64(v) = indexed[0].1 {
15576 checksum2 ^= *v;
15577 }
15578 }
15579 let scalar = t1.elapsed();
15580 eprintln!(
15581 "sort_single 5M i64 x{iters}: radix={radix:?} scalar={scalar:?} ratio={:.2}x (chk {checksum}/{checksum2})",
15582 scalar.as_secs_f64() / radix.as_secs_f64()
15583 );
15584 }
15585
15586 #[test]
15587 fn diff_periods_one_subtracts_prev() {
15588 let col =
15589 Column::from_values(vec![Scalar::Int64(5), Scalar::Int64(8), Scalar::Int64(10)])
15590 .expect("col");
15591 let d = col.diff(1).expect("diff");
15592 assert!(d.values()[0].is_missing());
15593 assert_eq!(d.values()[1], Scalar::Float64(3.0));
15594 assert_eq!(d.values()[2], Scalar::Float64(2.0));
15595 assert_eq!(d.dtype(), DType::Float64);
15596 }
15597
15598 #[test]
15599 fn diff_negative_period_looks_ahead() {
15600 let col =
15601 Column::from_values(vec![Scalar::Int64(5), Scalar::Int64(8), Scalar::Int64(10)])
15602 .expect("col");
15603 let d = col.diff(-1).expect("diff");
15604 assert_eq!(d.values()[0], Scalar::Float64(-3.0));
15605 assert_eq!(d.values()[1], Scalar::Float64(-2.0));
15606 assert!(d.values()[2].is_missing());
15607 }
15608
15609 #[test]
15610 fn diff_timedelta64_returns_timedelta_e607u() {
15611 let one_hour = 3_600 * 1_000_000_000_i64;
15614 let col = Column::from_values(vec![
15615 Scalar::Timedelta64(one_hour),
15616 Scalar::Timedelta64(3 * one_hour),
15617 Scalar::Timedelta64(2 * one_hour),
15618 ])
15619 .expect("col");
15620 let d = col.diff(1).expect("diff");
15621 assert_eq!(d.dtype(), DType::Timedelta64);
15622 assert!(d.values()[0].is_missing()); assert_eq!(d.values()[1], Scalar::Timedelta64(2 * one_hour));
15624 assert_eq!(d.values()[2], Scalar::Timedelta64(-one_hour));
15625 }
15626
15627 #[test]
15628 fn diff_timedelta64_nat_propagates_e607u() {
15629 use fp_types::Timedelta;
15630 let one_hour = 3_600 * 1_000_000_000_i64;
15631 let col = Column::from_values(vec![
15632 Scalar::Timedelta64(one_hour),
15633 Scalar::Timedelta64(Timedelta::NAT),
15634 Scalar::Timedelta64(2 * one_hour),
15635 ])
15636 .expect("col");
15637 let d = col.diff(1).expect("diff");
15638 assert_eq!(d.dtype(), DType::Timedelta64);
15639 assert!(d.values()[0].is_missing());
15640 assert!(d.values()[1].is_missing()); assert!(d.values()[2].is_missing()); }
15643
15644 #[test]
15645 fn duplicated_keep_first() {
15646 let col = Column::from_values(vec![
15647 Scalar::Int64(1),
15648 Scalar::Int64(2),
15649 Scalar::Int64(1),
15650 Scalar::Int64(3),
15651 Scalar::Int64(2),
15652 ])
15653 .expect("col");
15654 let d = col.duplicated().expect("duplicated");
15655 assert_eq!(d.values()[0], Scalar::Bool(false));
15656 assert_eq!(d.values()[1], Scalar::Bool(false));
15657 assert_eq!(d.values()[2], Scalar::Bool(true));
15658 assert_eq!(d.values()[3], Scalar::Bool(false));
15659 assert_eq!(d.values()[4], Scalar::Bool(true));
15660 }
15661
15662 #[test]
15663 fn duplicated_treats_nulls_as_one_bucket() {
15664 let col = Column::from_values(vec![
15665 Scalar::Null(NullKind::NaN),
15666 Scalar::Null(NullKind::NaN),
15667 Scalar::Int64(1),
15668 ])
15669 .expect("col");
15670 let d = col.duplicated().expect("duplicated");
15671 assert_eq!(d.values()[0], Scalar::Bool(false));
15672 assert_eq!(d.values()[1], Scalar::Bool(true));
15673 assert_eq!(d.values()[2], Scalar::Bool(false));
15674 }
15675
15676 #[test]
15677 fn duplicated_keep_variants_match_pandas() {
15678 let col = Column::from_values(vec![
15679 Scalar::Int64(1),
15680 Scalar::Int64(2),
15681 Scalar::Int64(1),
15682 Scalar::Int64(1),
15683 Scalar::Int64(3),
15684 Scalar::Int64(2),
15685 Scalar::Null(NullKind::NaN),
15686 Scalar::Null(NullKind::NaN),
15687 ])
15688 .expect("col");
15689
15690 let last = col.duplicated_keep("last").expect("duplicated last");
15691 assert_eq!(
15692 last.values(),
15693 &[
15694 Scalar::Bool(true),
15695 Scalar::Bool(true),
15696 Scalar::Bool(true),
15697 Scalar::Bool(false),
15698 Scalar::Bool(false),
15699 Scalar::Bool(false),
15700 Scalar::Bool(true),
15701 Scalar::Bool(false),
15702 ]
15703 );
15704
15705 let none = col.duplicated_keep("false").expect("duplicated none");
15706 assert_eq!(
15707 none.values(),
15708 &[
15709 Scalar::Bool(true),
15710 Scalar::Bool(true),
15711 Scalar::Bool(true),
15712 Scalar::Bool(true),
15713 Scalar::Bool(false),
15714 Scalar::Bool(true),
15715 Scalar::Bool(true),
15716 Scalar::Bool(true),
15717 ]
15718 );
15719 }
15720
15721 #[test]
15722 fn between_inclusive_both() {
15723 let col = Column::from_values(vec![
15724 Scalar::Float64(0.5),
15725 Scalar::Float64(1.0),
15726 Scalar::Float64(5.0),
15727 Scalar::Float64(6.0),
15728 ])
15729 .expect("col");
15730 let b = col.between(1.0, 5.0, true).expect("between");
15731 assert_eq!(b.values()[0], Scalar::Bool(false));
15732 assert_eq!(b.values()[1], Scalar::Bool(true));
15733 assert_eq!(b.values()[2], Scalar::Bool(true));
15734 assert_eq!(b.values()[3], Scalar::Bool(false));
15735 }
15736
15737 #[test]
15738 fn between_exclusive() {
15739 let col = Column::from_values(vec![
15740 Scalar::Float64(1.0),
15741 Scalar::Float64(3.0),
15742 Scalar::Float64(5.0),
15743 ])
15744 .expect("col");
15745 let b = col.between(1.0, 5.0, false).expect("between");
15746 assert_eq!(b.values()[0], Scalar::Bool(false));
15747 assert_eq!(b.values()[1], Scalar::Bool(true));
15748 assert_eq!(b.values()[2], Scalar::Bool(false));
15749 }
15750
15751 #[test]
15752 fn between_left_and_right_inclusive_edges() {
15753 let col = Column::from_values(vec![
15754 Scalar::Float64(1.0),
15755 Scalar::Float64(3.0),
15756 Scalar::Float64(5.0),
15757 ])
15758 .expect("col");
15759
15760 let left = col
15761 .between_inclusive(1.0, 5.0, "left")
15762 .expect("between left");
15763 assert_eq!(
15764 left.values(),
15765 &[Scalar::Bool(true), Scalar::Bool(true), Scalar::Bool(false),]
15766 );
15767
15768 let right = col
15769 .between_inclusive(1.0, 5.0, "right")
15770 .expect("between right");
15771 assert_eq!(
15772 right.values(),
15773 &[Scalar::Bool(false), Scalar::Bool(true), Scalar::Bool(true),]
15774 );
15775 }
15776
15777 #[test]
15778 fn between_missing_maps_to_false() {
15779 let col = Column::from_values(vec![Scalar::Null(NullKind::NaN), Scalar::Float64(3.0)])
15780 .expect("col");
15781 let b = col.between(1.0, 5.0, true).expect("between");
15782 assert_eq!(b.values()[0], Scalar::Bool(false));
15783 assert_eq!(b.values()[1], Scalar::Bool(true));
15784 }
15785 }
15786
15787 mod factorize {
15788 use fp_types::NullKind;
15789
15790 use super::*;
15791
15792 #[test]
15793 fn factorize_preserves_first_seen_order() {
15794 let col = Column::from_values(vec![
15795 Scalar::Utf8("b".into()),
15796 Scalar::Utf8("a".into()),
15797 Scalar::Utf8("b".into()),
15798 Scalar::Utf8("c".into()),
15799 Scalar::Utf8("a".into()),
15800 ])
15801 .expect("col");
15802
15803 let (codes, uniques) = col.factorize().expect("factorize");
15804 assert_eq!(codes.dtype(), DType::Int64);
15805 assert_eq!(
15806 codes.values(),
15807 &[
15808 Scalar::Int64(0),
15809 Scalar::Int64(1),
15810 Scalar::Int64(0),
15811 Scalar::Int64(2),
15812 Scalar::Int64(1),
15813 ]
15814 );
15815 assert_eq!(
15816 uniques.values(),
15817 &[
15818 Scalar::Utf8("b".into()),
15819 Scalar::Utf8("a".into()),
15820 Scalar::Utf8("c".into()),
15821 ]
15822 );
15823 }
15824
15825 #[test]
15826 fn factorize_missing_values_map_to_negative_one() {
15827 let col = Column::from_values(vec![
15828 Scalar::Float64(1.5),
15829 Scalar::Null(NullKind::NaN),
15830 Scalar::Float64(2.5),
15831 Scalar::Null(NullKind::Null),
15832 Scalar::Float64(1.5),
15833 ])
15834 .expect("col");
15835
15836 let (codes, uniques) = col.factorize().expect("factorize");
15837 assert_eq!(
15838 codes.values(),
15839 &[
15840 Scalar::Int64(0),
15841 Scalar::Int64(-1),
15842 Scalar::Int64(1),
15843 Scalar::Int64(-1),
15844 Scalar::Int64(0),
15845 ]
15846 );
15847 assert_eq!(uniques.dtype(), DType::Float64);
15848 assert_eq!(
15849 uniques.values(),
15850 &[Scalar::Float64(1.5), Scalar::Float64(2.5)]
15851 );
15852 }
15853
15854 #[test]
15855 fn factorize_empty_column_returns_empty_outputs() {
15856 let col = Column::new(DType::Int64, Vec::new()).expect("col");
15857 let (codes, uniques) = col.factorize().expect("factorize");
15858 assert!(codes.is_empty());
15859 assert!(uniques.is_empty());
15860 assert_eq!(codes.dtype(), DType::Int64);
15861 assert_eq!(uniques.dtype(), DType::Int64);
15862 }
15863
15864 #[test]
15865 fn factorize_with_sort_sorts_uniques_and_relabels_codes() {
15866 let col = Column::from_values(vec![
15867 Scalar::Utf8("b".into()),
15868 Scalar::Utf8("a".into()),
15869 Scalar::Utf8("b".into()),
15870 Scalar::Utf8("c".into()),
15871 Scalar::Utf8("a".into()),
15872 ])
15873 .expect("col");
15874
15875 let (codes, uniques) = col.factorize_with_options(true, true).expect("factorize");
15876 assert_eq!(
15877 codes.values(),
15878 &[
15879 Scalar::Int64(1),
15880 Scalar::Int64(0),
15881 Scalar::Int64(1),
15882 Scalar::Int64(2),
15883 Scalar::Int64(0),
15884 ]
15885 );
15886 assert_eq!(
15887 uniques.values(),
15888 &[
15889 Scalar::Utf8("a".into()),
15890 Scalar::Utf8("b".into()),
15891 Scalar::Utf8("c".into()),
15892 ]
15893 );
15894 }
15895
15896 #[test]
15897 fn factorize_with_use_na_sentinel_false_keeps_missing_in_uniques() {
15898 let col = Column::from_values(vec![
15899 Scalar::Float64(1.5),
15900 Scalar::Null(NullKind::NaN),
15901 Scalar::Float64(2.5),
15902 Scalar::Null(NullKind::Null),
15903 Scalar::Float64(1.5),
15904 ])
15905 .expect("col");
15906
15907 let (codes, uniques) = col.factorize_with_options(false, false).expect("factorize");
15908 assert_eq!(
15909 codes.values(),
15910 &[
15911 Scalar::Int64(0),
15912 Scalar::Int64(1),
15913 Scalar::Int64(2),
15914 Scalar::Int64(1),
15915 Scalar::Int64(0),
15916 ]
15917 );
15918 assert_eq!(uniques.dtype(), DType::Float64);
15919 assert_eq!(
15920 uniques.values(),
15921 &[
15922 Scalar::Float64(1.5),
15923 Scalar::Null(NullKind::NaN),
15924 Scalar::Float64(2.5),
15925 ]
15926 );
15927 }
15928
15929 #[test]
15930 fn factorize_with_sort_and_use_na_sentinel_false_sorts_missing_last() {
15931 let col = Column::from_values(vec![
15932 Scalar::Utf8("b".into()),
15933 Scalar::Null(NullKind::Null),
15934 Scalar::Utf8("a".into()),
15935 Scalar::Utf8("b".into()),
15936 Scalar::Null(NullKind::NaN),
15937 ])
15938 .expect("col");
15939
15940 let (codes, uniques) = col.factorize_with_options(true, false).expect("factorize");
15941 assert_eq!(
15942 codes.values(),
15943 &[
15944 Scalar::Int64(1),
15945 Scalar::Int64(2),
15946 Scalar::Int64(0),
15947 Scalar::Int64(1),
15948 Scalar::Int64(2),
15949 ]
15950 );
15951 assert_eq!(
15952 uniques.values(),
15953 &[
15954 Scalar::Utf8("a".into()),
15955 Scalar::Utf8("b".into()),
15956 Scalar::Null(NullKind::Null),
15957 ]
15958 );
15959 }
15960 }
15961
15962 mod aggregation_helpers {
15963 use fp_types::{NullKind, Timedelta};
15964
15965 use super::*;
15966
15967 fn assert_float_nan(value: Scalar) {
15968 assert!(
15969 matches!(value, Scalar::Float64(v) if v.is_nan()),
15970 "expected Float64(NaN), got {value:?}"
15971 );
15972 }
15973
15974 #[test]
15975 fn sum_skips_nulls() {
15976 let col = Column::from_values(vec![
15977 Scalar::Float64(1.0),
15978 Scalar::Null(NullKind::NaN),
15979 Scalar::Float64(2.0),
15980 Scalar::Float64(3.0),
15981 ])
15982 .expect("col");
15983 let sum = col.sum();
15984 assert!(matches!(sum, Scalar::Float64(_)), "expected Float64 result");
15985 if let Scalar::Float64(v) = sum {
15986 assert!((v - 6.0).abs() < 1e-9);
15987 }
15988 }
15989
15990 #[test]
15991 fn sum_empty_is_zero() {
15992 let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
15993 assert_eq!(col.sum(), Scalar::Float64(0.0));
15994 }
15995
15996 #[test]
15997 fn mean_matches_sum_over_count() {
15998 let col = Column::from_values(vec![
15999 Scalar::Float64(2.0),
16000 Scalar::Float64(4.0),
16001 Scalar::Float64(6.0),
16002 ])
16003 .expect("col");
16004 let mean = col.mean();
16005 assert!(
16006 matches!(mean, Scalar::Float64(_)),
16007 "expected Float64 result"
16008 );
16009 if let Scalar::Float64(v) = mean {
16010 assert!((v - 4.0).abs() < 1e-9);
16011 }
16012 }
16013
16014 #[test]
16015 fn mean_empty_is_null() {
16016 let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
16017 assert!(col.mean().is_missing());
16018 }
16019
16020 #[test]
16021 fn min_max_extrema_skip_nulls() {
16022 let col = Column::from_values(vec![
16023 Scalar::Int64(3),
16024 Scalar::Null(NullKind::NaN),
16025 Scalar::Int64(1),
16026 Scalar::Int64(5),
16027 Scalar::Int64(2),
16028 ])
16029 .expect("col");
16030 assert_eq!(col.min(), Scalar::Int64(1));
16031 assert_eq!(col.max(), Scalar::Int64(5));
16032 }
16033
16034 #[test]
16035 fn median_of_odd_count() {
16036 let col = Column::from_values(vec![
16037 Scalar::Float64(1.0),
16038 Scalar::Float64(5.0),
16039 Scalar::Float64(3.0),
16040 ])
16041 .expect("col");
16042 let median = col.median();
16043 assert!(
16044 matches!(median, Scalar::Float64(_)),
16045 "expected Float64 result"
16046 );
16047 if let Scalar::Float64(v) = median {
16048 assert!((v - 3.0).abs() < 1e-9);
16049 }
16050 }
16051
16052 #[test]
16053 fn prod_multiplies_non_nulls() {
16054 let col = Column::from_values(vec![
16055 Scalar::Float64(2.0),
16056 Scalar::Null(NullKind::NaN),
16057 Scalar::Float64(3.0),
16058 Scalar::Float64(4.0),
16059 ])
16060 .expect("col");
16061 let prod = col.prod();
16062 assert!(
16063 matches!(prod, Scalar::Float64(_)),
16064 "expected Float64 result"
16065 );
16066 if let Scalar::Float64(v) = prod {
16067 assert!((v - 24.0).abs() < 1e-9);
16068 }
16069 }
16070
16071 #[test]
16072 fn prod_empty_is_one() {
16073 let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
16074 assert_eq!(col.prod(), Scalar::Float64(1.0));
16075 }
16076
16077 #[test]
16078 fn product_alias_matches_prod() {
16079 let col = Column::from_values(vec![
16080 Scalar::Float64(2.0),
16081 Scalar::Null(NullKind::NaN),
16082 Scalar::Float64(3.0),
16083 ])
16084 .expect("col");
16085 assert_eq!(col.product(), col.prod());
16086 }
16087
16088 #[test]
16089 fn skipna_false_aggregate_variants_propagate_nan() {
16090 let col = Column::from_values(vec![
16091 Scalar::Float64(2.0),
16092 Scalar::Null(NullKind::NaN),
16093 Scalar::Float64(4.0),
16094 ])
16095 .expect("col");
16096
16097 assert_eq!(col.sum_skipna(true), col.sum());
16098 assert_float_nan(col.sum_skipna(false));
16099 assert_float_nan(col.mean_skipna(false));
16100 assert_float_nan(col.min_skipna(false));
16101 assert_float_nan(col.max_skipna(false));
16102 assert_float_nan(col.median_skipna(false));
16103 assert_float_nan(col.prod_skipna(false));
16104 assert_float_nan(col.var_skipna(1, false));
16105 assert_float_nan(col.std_skipna(1, false));
16106 assert_float_nan(col.sem_skipna(1, false));
16107 }
16108
16109 #[test]
16110 fn skipna_false_timedelta_aggregate_variants_propagate_nat() {
16111 let col = Column::from_values(vec![
16112 Scalar::Timedelta64(Timedelta::NANOS_PER_SEC),
16113 Scalar::Timedelta64(Timedelta::NAT),
16114 ])
16115 .expect("col");
16116
16117 assert_eq!(col.sum_skipna(false), Scalar::Timedelta64(Timedelta::NAT));
16118 assert_eq!(col.mean_skipna(false), Scalar::Timedelta64(Timedelta::NAT));
16119 assert_eq!(col.min_skipna(false), Scalar::Timedelta64(Timedelta::NAT));
16120 assert_eq!(col.max_skipna(false), Scalar::Timedelta64(Timedelta::NAT));
16121 }
16122
16123 #[test]
16124 fn quantile_median_of_sorted_values() {
16125 let col = Column::from_values(vec![
16126 Scalar::Float64(1.0),
16127 Scalar::Float64(2.0),
16128 Scalar::Float64(3.0),
16129 Scalar::Float64(4.0),
16130 Scalar::Float64(5.0),
16131 ])
16132 .expect("col");
16133 let quantile = col.quantile(0.5);
16134 assert!(
16135 matches!(quantile, Scalar::Float64(v) if (v - 3.0).abs() < 1e-9),
16136 "expected Float64 median, got {quantile:?}"
16137 );
16138 }
16139
16140 #[test]
16141 fn quantile_empty_is_null() {
16142 let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
16143 assert!(col.quantile(0.5).is_missing());
16144 }
16145
16146 #[test]
16147 fn quantile_out_of_range_is_null() {
16148 let col = Column::from_values(vec![Scalar::Float64(1.0)]).expect("col");
16149 assert!(col.quantile(1.5).is_missing());
16150 assert!(col.quantile(-0.1).is_missing());
16151 }
16152
16153 #[test]
16154 fn mode_returns_tied_max_frequency() {
16155 let col = Column::from_values(vec![
16156 Scalar::Int64(1),
16157 Scalar::Int64(2),
16158 Scalar::Int64(2),
16159 Scalar::Int64(3),
16160 Scalar::Int64(3),
16161 ])
16162 .expect("col");
16163 let m = col.mode().expect("mode");
16164 assert_eq!(m.values(), &[Scalar::Int64(2), Scalar::Int64(3)]);
16165 }
16166
16167 #[test]
16168 fn mode_ignores_missing_values() {
16169 let col = Column::from_values(vec![
16170 Scalar::Int64(1),
16171 Scalar::Null(NullKind::NaN),
16172 Scalar::Int64(1),
16173 Scalar::Null(NullKind::NaN),
16174 ])
16175 .expect("col");
16176 let m = col.mode().expect("mode");
16177 assert_eq!(m.values(), &[Scalar::Int64(1)]);
16178 }
16179
16180 #[test]
16181 fn mode_empty_is_empty_same_dtype() {
16182 let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
16183 let m = col.mode().expect("mode");
16184 assert!(m.is_empty());
16185 }
16186
16187 #[test]
16188 fn memory_usage_fixed_width_for_numeric() {
16189 let col =
16190 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
16191 .expect("col");
16192 let usage = col.memory_usage(false);
16193 assert_eq!(usage, 25);
16195 }
16196
16197 #[test]
16198 fn memory_usage_deep_counts_utf8_bytes() {
16199 let col = Column::from_values(vec![
16200 Scalar::Utf8("hi".into()),
16201 Scalar::Utf8("world".into()),
16202 ])
16203 .expect("col");
16204 let shallow = col.memory_usage(false);
16205 let deep = col.memory_usage(true);
16206 assert!(deep > shallow);
16207 assert_eq!(deep - shallow, 7);
16209 }
16210
16211 #[test]
16212 fn interpolate_fills_interior_gaps() {
16213 let col = Column::from_values(vec![
16214 Scalar::Float64(1.0),
16215 Scalar::Null(NullKind::NaN),
16216 Scalar::Null(NullKind::NaN),
16217 Scalar::Float64(4.0),
16218 ])
16219 .expect("col");
16220 let r = col.interpolate_linear().expect("interpolate");
16221 assert_eq!(r.values()[0], Scalar::Float64(1.0));
16222 assert!(
16223 matches!(&r.values()[1], Scalar::Float64(v) if (*v - 2.0).abs() < 1e-9),
16224 "expected Float64, got {:?}",
16225 r.values()[1]
16226 );
16227 assert!(
16228 matches!(&r.values()[2], Scalar::Float64(v) if (*v - 3.0).abs() < 1e-9),
16229 "expected Float64, got {:?}",
16230 r.values()[2]
16231 );
16232 assert_eq!(r.values()[3], Scalar::Float64(4.0));
16233 }
16234
16235 #[test]
16236 fn interpolate_leading_null_stays_null_trailing_forward_fills() {
16237 let col = Column::from_values(vec![
16243 Scalar::Null(NullKind::NaN),
16244 Scalar::Float64(2.0),
16245 Scalar::Null(NullKind::NaN),
16246 Scalar::Float64(4.0),
16247 Scalar::Null(NullKind::NaN),
16248 ])
16249 .expect("col");
16250 let r = col.interpolate_linear().expect("interpolate");
16251 assert!(r.values()[0].is_missing());
16252 assert_eq!(r.values()[1], Scalar::Float64(2.0));
16253 assert!(
16254 matches!(&r.values()[2], Scalar::Float64(v) if (*v - 3.0).abs() < 1e-9),
16255 "expected Float64, got {:?}",
16256 r.values()[2]
16257 );
16258 assert_eq!(r.values()[3], Scalar::Float64(4.0));
16259 assert_eq!(r.values()[4], Scalar::Float64(4.0));
16261 }
16262
16263 #[test]
16264 fn interpolate_trailing_run_forward_fills_without_extrapolating() {
16265 let col = Column::from_values(vec![
16267 Scalar::Float64(2.0),
16268 Scalar::Float64(4.0),
16269 Scalar::Null(NullKind::NaN),
16270 Scalar::Null(NullKind::NaN),
16271 ])
16272 .expect("col");
16273 let r = col.interpolate_linear().expect("interpolate");
16274 assert_eq!(
16275 r.values(),
16276 &[
16277 Scalar::Float64(2.0),
16278 Scalar::Float64(4.0),
16279 Scalar::Float64(4.0),
16280 Scalar::Float64(4.0),
16281 ]
16282 );
16283 }
16284
16285 #[test]
16286 fn interpolate_empty_is_empty_float64() {
16287 let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
16288 let r = col.interpolate_linear().expect("interpolate");
16289 assert!(r.is_empty());
16290 assert_eq!(r.dtype(), DType::Float64);
16291 }
16292
16293 #[test]
16294 fn interpolate_alias_matches_default_linear_interpolation() {
16295 let col = Column::from_values(vec![
16296 Scalar::Float64(1.0),
16297 Scalar::Null(NullKind::NaN),
16298 Scalar::Float64(3.0),
16299 ])
16300 .expect("col");
16301
16302 assert_eq!(
16303 col.interpolate().expect("interpolate"),
16304 col.interpolate_linear().expect("interpolate_linear")
16305 );
16306 }
16307
16308 #[test]
16309 fn drop_duplicates_keeps_first_occurrence() {
16310 let col = Column::from_values(vec![
16311 Scalar::Int64(1),
16312 Scalar::Int64(2),
16313 Scalar::Int64(1),
16314 Scalar::Int64(3),
16315 Scalar::Int64(2),
16316 ])
16317 .expect("col");
16318 let d = col.drop_duplicates().expect("drop_duplicates");
16319 assert_eq!(
16320 d.values(),
16321 &[Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)]
16322 );
16323 }
16324
16325 #[test]
16326 fn drop_duplicates_treats_nulls_as_one_bucket() {
16327 let col = Column::from_values(vec![
16328 Scalar::Null(NullKind::NaN),
16329 Scalar::Int64(1),
16330 Scalar::Null(NullKind::NaN),
16331 ])
16332 .expect("col");
16333 let d = col.drop_duplicates().expect("drop_duplicates");
16334 assert_eq!(d.len(), 2);
16336 assert!(d.values()[0].is_missing());
16337 assert_eq!(d.values()[1], Scalar::Int64(1));
16338 }
16339
16340 #[test]
16341 fn drop_duplicates_keep_variants_match_pandas() {
16342 let col = Column::from_values(vec![
16343 Scalar::Int64(1),
16344 Scalar::Int64(2),
16345 Scalar::Int64(1),
16346 Scalar::Int64(1),
16347 Scalar::Int64(3),
16348 Scalar::Int64(2),
16349 Scalar::Null(NullKind::NaN),
16350 Scalar::Null(NullKind::NaN),
16351 ])
16352 .expect("col");
16353
16354 let last = col.drop_duplicates_keep("last").expect("drop last");
16355 assert_eq!(last.len(), 4);
16356 assert_eq!(last.values()[0], Scalar::Int64(1));
16357 assert_eq!(last.values()[1], Scalar::Int64(3));
16358 assert_eq!(last.values()[2], Scalar::Int64(2));
16359 assert!(last.values()[3].is_missing());
16360
16361 let none = col.drop_duplicates_keep("false").expect("drop none");
16362 assert_eq!(none.values(), &[Scalar::Int64(3)]);
16363 }
16364
16365 #[test]
16366 fn compare_returns_only_differences() {
16367 let a = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
16368 .expect("a");
16369 let b =
16370 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(20), Scalar::Int64(3)])
16371 .expect("b");
16372 let (left, right) = a.compare(&b).expect("compare");
16373 assert_eq!(left.values(), &[Scalar::Int64(2)]);
16374 assert_eq!(right.values(), &[Scalar::Int64(20)]);
16375 }
16376
16377 #[test]
16378 fn compare_treats_matching_nulls_as_equal() {
16379 let a = Column::from_values(vec![Scalar::Int64(1), Scalar::Null(NullKind::NaN)])
16380 .expect("a");
16381 let b = Column::from_values(vec![Scalar::Int64(1), Scalar::Null(NullKind::NaN)])
16382 .expect("b");
16383 let (left, right) = a.compare(&b).expect("compare");
16384 assert!(left.is_empty());
16385 assert!(right.is_empty());
16386 }
16387
16388 #[test]
16389 fn compare_length_mismatch_errors() {
16390 let a = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("a");
16391 let b = Column::from_values(vec![Scalar::Int64(1)]).expect("b");
16392 let err = a.compare(&b).unwrap_err();
16393 assert!(matches!(err, crate::ColumnError::LengthMismatch { .. }));
16394 }
16395
16396 #[test]
16397 fn map_applies_unary_function() {
16398 let col =
16399 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
16400 .expect("col");
16401 let doubled = col
16402 .map(|v| match v {
16403 Scalar::Int64(i) => Scalar::Int64(i * 2),
16404 other => other.clone(),
16405 })
16406 .expect("map");
16407 assert_eq!(doubled.values()[0], Scalar::Int64(2));
16408 assert_eq!(doubled.values()[1], Scalar::Int64(4));
16409 assert_eq!(doubled.values()[2], Scalar::Int64(6));
16410 }
16411
16412 #[test]
16413 fn map_can_change_dtype() {
16414 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
16415 let as_str = col
16416 .map(|v| match v {
16417 Scalar::Int64(i) => Scalar::Utf8(i.to_string()),
16418 other => other.clone(),
16419 })
16420 .expect("map");
16421 assert_eq!(as_str.dtype(), DType::Utf8);
16422 assert_eq!(as_str.values()[0], Scalar::Utf8("1".into()));
16423 }
16424
16425 #[test]
16426 fn argmin_argmax_skip_missing() {
16427 let col = Column::from_values(vec![
16428 Scalar::Int64(3),
16429 Scalar::Null(NullKind::NaN),
16430 Scalar::Int64(1),
16431 Scalar::Int64(5),
16432 Scalar::Int64(2),
16433 ])
16434 .expect("col");
16435 assert_eq!(col.argmin(), Some(2));
16436 assert_eq!(col.argmax(), Some(3));
16437 assert_eq!(col.idxmin(), Some(2));
16438 assert_eq!(col.idxmax(), Some(3));
16439 }
16440
16441 #[test]
16442 fn argmin_argmax_all_missing_returns_none() {
16443 let col = Column::from_values(vec![
16444 Scalar::Null(NullKind::NaN),
16445 Scalar::Null(NullKind::Null),
16446 ])
16447 .expect("col");
16448 assert!(col.argmin().is_none());
16449 assert!(col.argmax().is_none());
16450 assert!(col.idxmin().is_none());
16451 assert!(col.idxmax().is_none());
16452 }
16453
16454 #[test]
16455 fn is_monotonic_increasing_detects_ascending() {
16456 let col = Column::from_values(vec![
16457 Scalar::Int64(1),
16458 Scalar::Int64(2),
16459 Scalar::Int64(2),
16460 Scalar::Int64(5),
16461 ])
16462 .expect("col");
16463 assert!(col.is_monotonic_increasing());
16464 assert!(!col.is_monotonic_decreasing());
16465 }
16466
16467 #[test]
16468 fn is_monotonic_decreasing_detects_descending() {
16469 let col =
16470 Column::from_values(vec![Scalar::Int64(5), Scalar::Int64(3), Scalar::Int64(1)])
16471 .expect("col");
16472 assert!(col.is_monotonic_decreasing());
16473 assert!(!col.is_monotonic_increasing());
16474 }
16475
16476 #[test]
16477 fn is_monotonic_skips_missing_values() {
16478 let col = Column::from_values(vec![
16479 Scalar::Int64(1),
16480 Scalar::Null(NullKind::NaN),
16481 Scalar::Int64(3),
16482 Scalar::Int64(5),
16483 ])
16484 .expect("col");
16485 assert!(col.is_monotonic_increasing());
16486 }
16487
16488 #[test]
16489 fn is_monotonic_empty_is_true() {
16490 let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
16491 assert!(col.is_monotonic_increasing());
16492 assert!(col.is_monotonic_decreasing());
16493 }
16494
16495 #[test]
16496 fn combine_first_fills_missing_from_other() {
16497 let a = Column::from_values(vec![
16498 Scalar::Int64(1),
16499 Scalar::Null(NullKind::NaN),
16500 Scalar::Int64(3),
16501 ])
16502 .expect("a");
16503 let b = Column::from_values(vec![
16504 Scalar::Int64(10),
16505 Scalar::Int64(20),
16506 Scalar::Int64(30),
16507 ])
16508 .expect("b");
16509 let c = a.combine_first(&b).expect("combine_first");
16510 assert_eq!(c.values()[0], Scalar::Int64(1));
16511 assert_eq!(c.values()[1], Scalar::Int64(20));
16512 assert_eq!(c.values()[2], Scalar::Int64(3));
16513 }
16514
16515 #[test]
16516 fn combine_first_length_mismatch_errors() {
16517 let a = Column::from_values(vec![Scalar::Int64(1)]).expect("a");
16518 let b = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("b");
16519 let err = a.combine_first(&b).unwrap_err();
16520 assert!(matches!(err, crate::ColumnError::LengthMismatch { .. }));
16521 }
16522
16523 #[test]
16524 fn clip_lower_only() {
16525 let col = Column::from_values(vec![
16526 Scalar::Float64(-2.0),
16527 Scalar::Float64(0.0),
16528 Scalar::Float64(5.0),
16529 ])
16530 .expect("col");
16531 let c = col.clip_lower(0.0).expect("clip_lower");
16532 assert_eq!(c.values()[0], Scalar::Float64(0.0));
16533 assert_eq!(c.values()[1], Scalar::Float64(0.0));
16534 assert_eq!(c.values()[2], Scalar::Float64(5.0));
16535 }
16536
16537 #[test]
16538 fn clip_upper_only() {
16539 let col = Column::from_values(vec![
16540 Scalar::Float64(-2.0),
16541 Scalar::Float64(0.0),
16542 Scalar::Float64(5.0),
16543 ])
16544 .expect("col");
16545 let c = col.clip_upper(1.0).expect("clip_upper");
16546 assert_eq!(c.values()[0], Scalar::Float64(-2.0));
16547 assert_eq!(c.values()[1], Scalar::Float64(0.0));
16548 assert_eq!(c.values()[2], Scalar::Float64(1.0));
16549 }
16550
16551 #[test]
16552 fn describe_returns_pandas_order() {
16553 let col = Column::from_values(vec![
16554 Scalar::Float64(1.0),
16555 Scalar::Float64(2.0),
16556 Scalar::Float64(3.0),
16557 Scalar::Float64(4.0),
16558 Scalar::Float64(5.0),
16559 ])
16560 .expect("col");
16561 let stats = col.describe().expect("describe");
16562 let names: Vec<&str> = stats.iter().map(|(k, _)| *k).collect();
16563 assert_eq!(
16564 names,
16565 vec!["count", "mean", "std", "min", "25%", "50%", "75%", "max"]
16566 );
16567 assert_eq!(stats[0].1, Scalar::Int64(5));
16568 assert!(
16569 matches!(&stats[1].1, Scalar::Float64(v) if (*v - 3.0).abs() < 1e-9),
16570 "expected Float64, got {:?}",
16571 stats[1].1
16572 );
16573 assert_eq!(stats[3].1, Scalar::Float64(1.0));
16574 assert_eq!(stats[7].1, Scalar::Float64(5.0));
16575 }
16576
16577 #[test]
16578 fn describe_rejects_utf8_column() {
16579 let col = Column::from_values(vec![Scalar::Utf8("a".into())]).expect("col");
16580 assert!(col.describe().is_err());
16581 }
16582
16583 #[test]
16584 fn combine_uses_fill_for_missing() {
16585 let a = Column::from_values(vec![
16586 Scalar::Int64(1),
16587 Scalar::Null(NullKind::NaN),
16588 Scalar::Int64(3),
16589 ])
16590 .expect("a");
16591 let b = Column::from_values(vec![
16592 Scalar::Int64(10),
16593 Scalar::Int64(20),
16594 Scalar::Null(NullKind::NaN),
16595 ])
16596 .expect("b");
16597 let out = a
16598 .combine(
16599 &b,
16600 |l, r| {
16601 if let (Ok(lf), Ok(rf)) = (l.to_f64(), r.to_f64()) {
16602 Scalar::Float64(lf + rf)
16603 } else {
16604 Scalar::Null(NullKind::NaN)
16605 }
16606 },
16607 Some(Scalar::Int64(0)),
16608 )
16609 .expect("combine");
16610 assert_eq!(out.values()[0], Scalar::Float64(11.0));
16611 assert_eq!(out.values()[1], Scalar::Float64(20.0));
16612 assert_eq!(out.values()[2], Scalar::Float64(3.0));
16613 }
16614
16615 #[test]
16616 fn combine_length_mismatch_errors() {
16617 let a = Column::from_values(vec![Scalar::Int64(1)]).expect("a");
16618 let b = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("b");
16619 let err = a
16620 .combine(&b, |l, _| l.clone(), Some(Scalar::Int64(0)))
16621 .unwrap_err();
16622 assert!(matches!(err, crate::ColumnError::LengthMismatch { .. }));
16623 }
16624
16625 #[test]
16626 fn combine_fill_none_propagates_nulls_without_invoking_func() {
16627 let a = Column::from_values(vec![
16628 Scalar::Float64(1.0),
16629 Scalar::Null(NullKind::NaN),
16630 Scalar::Float64(3.0),
16631 ])
16632 .expect("a");
16633 let b = Column::from_values(vec![
16634 Scalar::Float64(10.0),
16635 Scalar::Float64(20.0),
16636 Scalar::Null(NullKind::NaN),
16637 ])
16638 .expect("b");
16639 let mut calls = 0usize;
16640 let out = a
16641 .combine(
16642 &b,
16643 |l, r| {
16644 calls += 1;
16645 Scalar::Float64(l.to_f64().unwrap() + r.to_f64().unwrap())
16646 },
16647 None,
16648 )
16649 .expect("combine");
16650 assert_eq!(calls, 1);
16652 assert_eq!(out.values()[0], Scalar::Float64(11.0));
16653 assert!(out.values()[1].is_missing());
16654 assert!(out.values()[2].is_missing());
16655 }
16656
16657 #[test]
16658 fn combine_fill_none_all_present_matches_elementwise_apply() {
16659 let a = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("a");
16660 let b = Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(20)]).expect("b");
16661 let out = a
16662 .combine(
16663 &b,
16664 |l, r| Scalar::Int64(l.to_f64().unwrap() as i64 + r.to_f64().unwrap() as i64),
16665 None,
16666 )
16667 .expect("combine");
16668 assert_eq!(out.values()[0], Scalar::Int64(11));
16669 assert_eq!(out.values()[1], Scalar::Int64(22));
16670 }
16671
16672 #[test]
16673 fn apply_float_applies_numeric_fn() {
16674 let col = Column::from_values(vec![
16675 Scalar::Float64(1.0),
16676 Scalar::Null(NullKind::NaN),
16677 Scalar::Float64(4.0),
16678 ])
16679 .expect("col");
16680 let out = col.apply_float(|x| x.sqrt()).expect("apply_float");
16681 assert_eq!(out.values()[0], Scalar::Float64(1.0));
16682 assert!(out.values()[1].is_missing());
16683 assert_eq!(out.values()[2], Scalar::Float64(2.0));
16684 assert_eq!(out.dtype(), DType::Float64);
16685 }
16686
16687 #[test]
16688 fn apply_float_rejects_non_numeric() {
16689 let col = Column::from_values(vec![Scalar::Utf8("x".into())]).expect("col");
16690 assert!(col.apply_float(|x| x + 1.0).is_err());
16691 }
16692
16693 #[test]
16694 fn hist_counts_equal_width_bins() {
16695 let col = Column::from_values(vec![
16696 Scalar::Float64(0.0),
16697 Scalar::Float64(1.0),
16698 Scalar::Float64(2.0),
16699 Scalar::Float64(3.0),
16700 Scalar::Float64(9.0),
16701 ])
16702 .expect("col");
16703 let counts = col.hist_counts(3);
16704 assert_eq!(counts.len(), 3);
16706 assert_eq!(counts[0], 3); assert_eq!(counts[1], 1); assert_eq!(counts[2], 1); }
16710
16711 #[test]
16712 fn hist_counts_zero_bins_is_empty() {
16713 let col = Column::from_values(vec![Scalar::Float64(1.0)]).expect("col");
16714 assert!(col.hist_counts(0).is_empty());
16715 }
16716
16717 #[test]
16718 fn hist_counts_constant_column_puts_all_in_first_bin() {
16719 let col = Column::from_values(vec![
16720 Scalar::Float64(5.0),
16721 Scalar::Float64(5.0),
16722 Scalar::Float64(5.0),
16723 ])
16724 .expect("col");
16725 let counts = col.hist_counts(3);
16726 assert_eq!(counts[0], 3);
16727 assert_eq!(counts[1], 0);
16728 assert_eq!(counts[2], 0);
16729 }
16730
16731 #[test]
16732 fn nunique_drops_nulls() {
16733 let col = Column::from_values(vec![
16734 Scalar::Int64(1),
16735 Scalar::Int64(2),
16736 Scalar::Int64(1),
16737 Scalar::Null(NullKind::NaN),
16738 ])
16739 .expect("col");
16740 assert_eq!(col.nunique(), Scalar::Int64(2));
16741 }
16742
16743 #[test]
16744 fn nunique_with_dropna_false_counts_missing_once() {
16745 let col = Column::from_values(vec![
16746 Scalar::Int64(1),
16747 Scalar::Int64(2),
16748 Scalar::Int64(1),
16749 Scalar::Null(NullKind::NaN),
16750 Scalar::Null(NullKind::Null),
16751 ])
16752 .expect("col");
16753 assert_eq!(col.nunique_with_dropna(false), Scalar::Int64(3));
16754 }
16755
16756 #[test]
16757 fn nunique_with_dropna_false_all_missing_is_one() {
16758 let col = Column::from_values(vec![
16759 Scalar::Null(NullKind::NaN),
16760 Scalar::Null(NullKind::Null),
16761 Scalar::Null(NullKind::NaN),
16762 ])
16763 .expect("col");
16764 assert_eq!(col.nunique(), Scalar::Int64(0));
16765 assert_eq!(col.nunique_with_dropna(false), Scalar::Int64(1));
16766 }
16767
16768 #[test]
16769 fn any_all_reductions() {
16770 let col = Column::from_values(vec![Scalar::Int64(0), Scalar::Int64(0)]).expect("col");
16771 assert_eq!(col.any(), Scalar::Bool(false));
16772 assert_eq!(col.all(), Scalar::Bool(false));
16773
16774 let mixed = Column::from_values(vec![Scalar::Int64(0), Scalar::Int64(1)]).expect("col");
16775 assert_eq!(mixed.any(), Scalar::Bool(true));
16776 assert_eq!(mixed.all(), Scalar::Bool(false));
16777
16778 let all_true =
16779 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
16780 assert_eq!(all_true.all(), Scalar::Bool(true));
16781 }
16782
16783 #[test]
16784 fn is_unique_true_when_no_repeats() {
16785 let col =
16786 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
16787 .expect("col");
16788 assert!(col.is_unique());
16789 assert!(!col.has_duplicates());
16790 }
16791
16792 #[test]
16793 fn has_duplicates_true_when_repeats_present() {
16794 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(1)]).expect("col");
16795 assert!(col.has_duplicates());
16796 assert!(!col.is_unique());
16797 }
16798
16799 #[test]
16800 fn is_unique_ignores_nulls() {
16801 let col = Column::from_values(vec![
16802 Scalar::Int64(1),
16803 Scalar::Null(NullKind::NaN),
16804 Scalar::Null(NullKind::NaN),
16805 ])
16806 .expect("col");
16807 assert!(col.is_unique());
16808 }
16809
16810 #[test]
16811 fn pct_change_periods_one() {
16812 let col = Column::from_values(vec![
16813 Scalar::Float64(10.0),
16814 Scalar::Float64(12.0),
16815 Scalar::Float64(9.0),
16816 ])
16817 .expect("col");
16818 let r = col.pct_change(1).expect("pct_change");
16819 assert!(r.values()[0].is_missing());
16820 assert!(
16821 matches!(&r.values()[1], Scalar::Float64(v) if (*v - 0.2).abs() < 1e-9),
16822 "expected Float64, got {:?}",
16823 r.values()[1]
16824 );
16825 assert!(
16826 matches!(&r.values()[2], Scalar::Float64(v) if (*v + 0.25).abs() < 1e-9),
16827 "expected Float64, got {:?}",
16828 r.values()[2]
16829 );
16830 }
16831
16832 #[test]
16833 fn pct_change_zero_prev_yields_null() {
16834 let col =
16835 Column::from_values(vec![Scalar::Float64(0.0), Scalar::Float64(5.0)]).expect("col");
16836 let r = col.pct_change(1).expect("pct_change");
16837 assert!(r.values()[1].is_missing());
16838 }
16839
16840 #[test]
16841 fn pct_change_timedelta64_matches_pandas_mcu90() {
16842 let one_hour = 3_600 * 1_000_000_000_i64;
16846 let col = Column::from_values(vec![
16847 Scalar::Timedelta64(one_hour),
16848 Scalar::Timedelta64(2 * one_hour),
16849 Scalar::Timedelta64(4 * one_hour),
16850 ])
16851 .expect("col");
16852 let r = col.pct_change(1).expect("pct_change");
16853 assert!(r.values()[0].is_missing());
16854 assert!(
16855 matches!(&r.values()[1], Scalar::Float64(v) if (*v - 1.0).abs() < 1e-10),
16856 "expected Float64(1.0), got {:?}",
16857 r.values()[1]
16858 );
16859 assert!(
16860 matches!(&r.values()[2], Scalar::Float64(v) if (*v - 1.0).abs() < 1e-10),
16861 "expected Float64(1.0), got {:?}",
16862 r.values()[2]
16863 );
16864 }
16865
16866 #[test]
16867 fn pct_change_timedelta64_nat_propagates_mcu90() {
16868 use fp_types::Timedelta;
16869 let one_hour = 3_600 * 1_000_000_000_i64;
16870 let col = Column::from_values(vec![
16871 Scalar::Timedelta64(one_hour),
16872 Scalar::Timedelta64(Timedelta::NAT),
16873 Scalar::Timedelta64(2 * one_hour),
16874 ])
16875 .expect("col");
16876 let r = col.pct_change(1).expect("pct_change");
16877 assert!(r.values()[0].is_missing());
16878 assert!(r.values()[1].is_missing()); assert!(r.values()[2].is_missing()); }
16881
16882 #[test]
16883 fn pct_change_with_fill_ffill_uses_filled_previous_value() {
16884 let col = Column::from_values(vec![
16885 Scalar::Float64(10.0),
16886 Scalar::Null(NullKind::NaN),
16887 Scalar::Float64(12.0),
16888 ])
16889 .expect("col");
16890 let r = col
16891 .pct_change_with_fill(1, Some("ffill"), None)
16892 .expect("pct_change_with_fill");
16893 assert!(r.values()[0].is_missing());
16894 assert!(
16895 matches!(&r.values()[1], Scalar::Float64(v) if v.abs() < 1e-9),
16896 "expected Float64, got {:?}",
16897 r.values()[1]
16898 );
16899 assert!(
16900 matches!(&r.values()[2], Scalar::Float64(v) if (*v - 0.2).abs() < 1e-9),
16901 "expected Float64, got {:?}",
16902 r.values()[2]
16903 );
16904 }
16905
16906 #[test]
16907 fn pct_change_with_fill_limit_caps_forward_fill_runs() {
16908 let col = Column::from_values(vec![
16909 Scalar::Float64(10.0),
16910 Scalar::Null(NullKind::NaN),
16911 Scalar::Null(NullKind::NaN),
16912 Scalar::Float64(20.0),
16913 ])
16914 .expect("col");
16915 let r = col
16916 .pct_change_with_fill(1, Some("ffill"), Some(1))
16917 .expect("pct_change_with_fill");
16918 assert!(r.values()[0].is_missing());
16919 assert!(
16920 matches!(&r.values()[1], Scalar::Float64(v) if v.abs() < 1e-9),
16921 "expected Float64, got {:?}",
16922 r.values()[1]
16923 );
16924 assert!(r.values()[2].is_missing());
16925 assert!(r.values()[3].is_missing());
16926 }
16927
16928 #[test]
16929 fn pct_change_with_fill_bfill_aliases_backward_fill() {
16930 let col = Column::from_values(vec![
16931 Scalar::Float64(10.0),
16932 Scalar::Null(NullKind::NaN),
16933 Scalar::Float64(20.0),
16934 ])
16935 .expect("col");
16936 let r = col
16937 .pct_change_with_fill(1, Some("backfill"), None)
16938 .expect("pct_change_with_fill");
16939 assert!(r.values()[0].is_missing());
16940 assert!(
16941 matches!(&r.values()[1], Scalar::Float64(v) if (*v - 1.0).abs() < 1e-9),
16942 "expected Float64, got {:?}",
16943 r.values()[1]
16944 );
16945 assert!(
16946 matches!(&r.values()[2], Scalar::Float64(v) if v.abs() < 1e-9),
16947 "expected Float64, got {:?}",
16948 r.values()[2]
16949 );
16950 }
16951
16952 #[test]
16953 fn pct_change_with_fill_rejects_invalid_method() {
16954 let col =
16955 Column::from_values(vec![Scalar::Float64(1.0), Scalar::Float64(2.0)]).expect("col");
16956 let err = col
16957 .pct_change_with_fill(1, Some("nearest"), None)
16958 .expect_err("invalid fill_method should error");
16959 assert!(matches!(
16960 err,
16961 crate::ColumnError::Type(fp_types::TypeError::NonNumericValue { .. })
16962 ));
16963 }
16964
16965 #[test]
16966 fn ffill_fills_trailing_missing_runs() {
16967 let col = Column::from_values(vec![
16968 Scalar::Null(NullKind::NaN),
16969 Scalar::Int64(1),
16970 Scalar::Null(NullKind::NaN),
16971 Scalar::Null(NullKind::NaN),
16972 Scalar::Int64(5),
16973 ])
16974 .expect("col");
16975 let r = col.ffill(None).expect("ffill");
16976 assert!(r.values()[0].is_missing());
16977 assert_eq!(r.values()[1], Scalar::Int64(1));
16978 assert_eq!(r.values()[2], Scalar::Int64(1));
16979 assert_eq!(r.values()[3], Scalar::Int64(1));
16980 assert_eq!(r.values()[4], Scalar::Int64(5));
16981 }
16982
16983 #[test]
16984 fn ffill_respects_limit_per_run() {
16985 let col = Column::from_values(vec![
16986 Scalar::Int64(1),
16987 Scalar::Null(NullKind::NaN),
16988 Scalar::Null(NullKind::NaN),
16989 Scalar::Null(NullKind::NaN),
16990 Scalar::Int64(9),
16991 ])
16992 .expect("col");
16993 let r = col.ffill(Some(2)).expect("ffill");
16994 assert_eq!(r.values()[0], Scalar::Int64(1));
16995 assert_eq!(r.values()[1], Scalar::Int64(1));
16996 assert_eq!(r.values()[2], Scalar::Int64(1));
16997 assert!(r.values()[3].is_missing());
16998 assert_eq!(r.values()[4], Scalar::Int64(9));
16999 assert_eq!(col.pad(Some(2)), col.ffill(Some(2)));
17000 }
17001
17002 #[test]
17003 fn bfill_fills_leading_missing_runs() {
17004 let col = Column::from_values(vec![
17005 Scalar::Null(NullKind::NaN),
17006 Scalar::Null(NullKind::NaN),
17007 Scalar::Int64(3),
17008 Scalar::Null(NullKind::NaN),
17009 ])
17010 .expect("col");
17011 let r = col.bfill(None).expect("bfill");
17012 assert_eq!(r.values()[0], Scalar::Int64(3));
17013 assert_eq!(r.values()[1], Scalar::Int64(3));
17014 assert_eq!(r.values()[2], Scalar::Int64(3));
17015 assert!(r.values()[3].is_missing());
17017 }
17018
17019 #[test]
17020 fn bfill_respects_limit_per_run() {
17021 let col = Column::from_values(vec![
17022 Scalar::Null(NullKind::NaN),
17023 Scalar::Null(NullKind::NaN),
17024 Scalar::Null(NullKind::NaN),
17025 Scalar::Int64(7),
17026 ])
17027 .expect("col");
17028 let r = col.bfill(Some(1)).expect("bfill");
17029 assert!(r.values()[0].is_missing());
17030 assert!(r.values()[1].is_missing());
17031 assert_eq!(r.values()[2], Scalar::Int64(7));
17032 assert_eq!(r.values()[3], Scalar::Int64(7));
17033 assert_eq!(col.backfill(Some(1)), col.bfill(Some(1)));
17034 }
17035
17036 #[test]
17037 fn ffill_empty_is_empty_same_dtype() {
17038 let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
17039 let r = col.ffill(None).expect("ffill");
17040 assert!(r.is_empty());
17041 assert_eq!(r.dtype(), DType::Null);
17042 }
17043
17044 #[test]
17045 fn pandas_metadata_and_materialization_aliases_match_core_methods()
17046 -> Result<(), crate::ColumnError> {
17047 let col = Column::from_values(vec![
17048 Scalar::Int64(1),
17049 Scalar::Null(NullKind::NaN),
17050 Scalar::Int64(2),
17051 ])?;
17052
17053 assert_eq!(col.size(), col.len());
17054 assert_eq!(col.shape(), (col.len(),));
17055 assert_eq!(col.ndim(), 1);
17056 assert_eq!(col.empty(), col.is_empty());
17057 assert_eq!(col.to_list(), col.to_vec());
17058 assert_eq!(col.tolist(), col.to_vec());
17059 assert_eq!(col.to_numpy(), col.to_vec());
17060 assert_eq!(col.ravel(), col.to_numpy());
17061 assert_eq!(col.array(), col.to_vec());
17062 Ok(())
17063 }
17064
17065 #[test]
17066 fn isnull_notnull_flag_missing_positions() {
17067 let col = Column::from_values(vec![
17068 Scalar::Int64(1),
17069 Scalar::Null(NullKind::NaN),
17070 Scalar::Int64(2),
17071 ])
17072 .expect("col");
17073 let is_null = col.isnull().expect("isnull");
17074 let not_null = col.notnull().expect("notnull");
17075 assert_eq!(is_null.dtype(), DType::Bool);
17076 assert_eq!(
17077 is_null.values(),
17078 &[Scalar::Bool(false), Scalar::Bool(true), Scalar::Bool(false),]
17079 );
17080 assert_eq!(col.isna(), col.isnull());
17081 assert_eq!(
17082 not_null.values(),
17083 &[Scalar::Bool(true), Scalar::Bool(false), Scalar::Bool(true),]
17084 );
17085 assert_eq!(col.notna(), col.notnull());
17086 }
17087
17088 #[test]
17089 fn var_std_sem_ddof_one() {
17090 let col = Column::from_values(vec![
17091 Scalar::Float64(2.0),
17092 Scalar::Float64(4.0),
17093 Scalar::Float64(4.0),
17094 Scalar::Float64(4.0),
17095 Scalar::Float64(5.0),
17096 Scalar::Float64(5.0),
17097 Scalar::Float64(7.0),
17098 Scalar::Float64(9.0),
17099 ])
17100 .expect("col");
17101 match col.var(1) {
17102 Scalar::Float64(v) => assert!((v - 4.571428571428571).abs() < 1e-9),
17103 other => unreachable!("expected Float64, got {other:?}"),
17104 }
17105 match col.std(1) {
17106 Scalar::Float64(v) => assert!((v - 2.138089935299395).abs() < 1e-9),
17107 other => unreachable!("expected Float64, got {other:?}"),
17108 }
17109 match col.sem(1) {
17110 Scalar::Float64(v) => assert!((v - 0.7559289460184544).abs() < 1e-9),
17111 other => unreachable!("expected Float64, got {other:?}"),
17112 }
17113 }
17114
17115 #[test]
17116 fn skew_symmetric_is_zero() {
17117 let col = Column::from_values(vec![
17118 Scalar::Float64(1.0),
17119 Scalar::Float64(2.0),
17120 Scalar::Float64(3.0),
17121 Scalar::Float64(4.0),
17122 Scalar::Float64(5.0),
17123 ])
17124 .expect("col");
17125 match col.skew() {
17126 Scalar::Float64(v) => assert!(v.abs() < 1e-9),
17127 other => unreachable!("expected Float64, got {other:?}"),
17128 }
17129 }
17130
17131 #[test]
17132 fn kurt_uniform_five_values_is_minus_one_point_two() {
17133 let col = Column::from_values(vec![
17134 Scalar::Float64(1.0),
17135 Scalar::Float64(2.0),
17136 Scalar::Float64(3.0),
17137 Scalar::Float64(4.0),
17138 Scalar::Float64(5.0),
17139 ])
17140 .expect("col");
17141 match col.kurt() {
17142 Scalar::Float64(v) => assert!((v + 1.2).abs() < 1e-9),
17143 other => unreachable!("expected Float64, got {other:?}"),
17144 }
17145 }
17146
17147 #[test]
17148 fn kurtosis_alias_matches_kurt() {
17149 let col = Column::from_values(vec![
17150 Scalar::Float64(1.0),
17151 Scalar::Float64(2.0),
17152 Scalar::Float64(3.0),
17153 Scalar::Float64(4.0),
17154 Scalar::Float64(5.0),
17155 ])
17156 .expect("col");
17157 assert_eq!(col.kurtosis(), col.kurt());
17158 }
17159
17160 #[test]
17161 fn ptp_returns_max_minus_min() {
17162 let col = Column::from_values(vec![
17163 Scalar::Float64(3.0),
17164 Scalar::Null(NullKind::NaN),
17165 Scalar::Float64(7.0),
17166 Scalar::Float64(1.0),
17167 ])
17168 .expect("col");
17169 assert_eq!(col.ptp(), Scalar::Float64(6.0));
17170 }
17171
17172 #[test]
17173 fn ptp_empty_is_null() {
17174 let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
17175 assert!(col.ptp().is_missing());
17176 }
17177
17178 #[test]
17179 fn skew_too_few_values_returns_null() {
17180 let col =
17181 Column::from_values(vec![Scalar::Float64(1.0), Scalar::Float64(2.0)]).expect("col");
17182 assert!(col.skew().is_missing());
17183 }
17184
17185 #[test]
17186 fn rolling_window_sum_full_window() {
17187 let col = Column::from_values(vec![
17188 Scalar::Float64(1.0),
17189 Scalar::Float64(2.0),
17190 Scalar::Float64(3.0),
17191 Scalar::Float64(4.0),
17192 Scalar::Float64(5.0),
17193 ])
17194 .expect("col");
17195 let r = col.rolling_window_sum(3, 3).expect("rolling");
17197 assert!(r.values()[0].is_missing());
17198 assert!(r.values()[1].is_missing());
17199 assert_eq!(r.values()[2], Scalar::Float64(6.0));
17200 assert_eq!(r.values()[3], Scalar::Float64(9.0));
17201 assert_eq!(r.values()[4], Scalar::Float64(12.0));
17202 }
17203
17204 #[test]
17205 fn rolling_window_sum_min_periods_relaxed() {
17206 let col = Column::from_values(vec![
17207 Scalar::Float64(1.0),
17208 Scalar::Float64(2.0),
17209 Scalar::Float64(3.0),
17210 ])
17211 .expect("col");
17212 let r = col.rolling_window_sum(3, 1).expect("rolling");
17214 assert_eq!(r.values()[0], Scalar::Float64(1.0));
17215 assert_eq!(r.values()[1], Scalar::Float64(3.0));
17216 assert_eq!(r.values()[2], Scalar::Float64(6.0));
17217 }
17218
17219 #[test]
17220 fn rolling_window_sum_skips_missing() {
17221 let col = Column::from_values(vec![
17222 Scalar::Float64(1.0),
17223 Scalar::Null(NullKind::NaN),
17224 Scalar::Float64(3.0),
17225 Scalar::Float64(4.0),
17226 ])
17227 .expect("col");
17228 let r = col.rolling_window_sum(3, 2).expect("rolling");
17234 assert!(r.values()[0].is_missing());
17235 assert!(r.values()[1].is_missing());
17236 assert_eq!(r.values()[2], Scalar::Float64(4.0));
17237 assert_eq!(r.values()[3], Scalar::Float64(7.0));
17238 }
17239
17240 #[test]
17241 fn rolling_window_sum_window_zero_is_all_null() {
17242 let col =
17243 Column::from_values(vec![Scalar::Float64(1.0), Scalar::Float64(2.0)]).expect("col");
17244 let r = col.rolling_window_sum(0, 0).expect("rolling");
17245 assert!(r.values()[0].is_missing());
17246 assert!(r.values()[1].is_missing());
17247 assert_eq!(r.dtype(), DType::Float64);
17248 }
17249
17250 #[test]
17251 fn diff_valid_skips_missing_predecessors() {
17252 let col = Column::from_values(vec![
17253 Scalar::Null(NullKind::NaN),
17254 Scalar::Float64(1.0),
17255 Scalar::Null(NullKind::NaN),
17256 Scalar::Float64(4.0),
17257 Scalar::Float64(7.0),
17258 ])
17259 .expect("col");
17260 let r = col.diff_valid().expect("diff_valid");
17261 assert!(r.values()[0].is_missing()); assert!(r.values()[1].is_missing()); assert!(r.values()[2].is_missing()); assert_eq!(r.values()[3], Scalar::Float64(3.0)); assert_eq!(r.values()[4], Scalar::Float64(3.0)); }
17267
17268 #[test]
17269 fn diff_valid_empty_column() {
17270 let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
17271 let r = col.diff_valid().expect("diff_valid");
17272 assert!(r.is_empty());
17273 assert_eq!(r.dtype(), DType::Float64);
17274 }
17275
17276 #[test]
17277 fn sample_without_replacement_deterministic_by_seed() {
17278 let col =
17279 Column::from_values((0..10).map(Scalar::Int64).collect::<Vec<_>>()).expect("col");
17280 let a = col.sample(3, 42).expect("sample");
17281 let b = col.sample(3, 42).expect("sample");
17282 assert_eq!(a.values(), b.values());
17284 assert_eq!(a.len(), 3);
17285 for v in a.values() {
17287 match v {
17288 Scalar::Int64(x) => assert!((0..10).contains(x)),
17289 other => unreachable!("unexpected value {other:?}"),
17290 }
17291 }
17292 }
17293
17294 #[test]
17295 fn sample_different_seeds_likely_differ() {
17296 let col =
17297 Column::from_values((0..100).map(Scalar::Int64).collect::<Vec<_>>()).expect("col");
17298 let a = col.sample(5, 1).expect("sample");
17299 let b = col.sample(5, 2).expect("sample");
17300 assert_ne!(a.values(), b.values());
17303 }
17304
17305 #[test]
17306 fn sample_n_at_or_above_len_returns_clone() {
17307 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
17308 let r = col.sample(10, 42).expect("sample");
17309 assert_eq!(r.values(), col.values());
17310 }
17311
17312 #[test]
17313 fn first_valid_last_valid_skip_nulls() {
17314 let col = Column::from_values(vec![
17315 Scalar::Null(NullKind::NaN),
17316 Scalar::Null(NullKind::NaN),
17317 Scalar::Int64(5),
17318 Scalar::Int64(7),
17319 Scalar::Null(NullKind::NaN),
17320 ])
17321 .expect("col");
17322 assert_eq!(col.first_valid(), Some(2));
17323 assert_eq!(col.last_valid(), Some(3));
17324 assert_eq!(col.first_valid_index(), Some(2));
17325 assert_eq!(col.last_valid_index(), Some(3));
17326 }
17327
17328 #[test]
17329 fn nsmallest_keep_first_breaks_ties_by_earlier_position() {
17330 let col = Column::from_values(vec![
17331 Scalar::Int64(2), Scalar::Int64(1), Scalar::Int64(1), Scalar::Int64(3), Scalar::Int64(1), ])
17337 .expect("col");
17338 let r = col.nsmallest_keep(2, "first").expect("nsmallest_keep");
17339 assert_eq!(r.len(), 2);
17341 assert_eq!(r.values()[0], Scalar::Int64(1));
17342 assert_eq!(r.values()[1], Scalar::Int64(1));
17343 }
17344
17345 #[test]
17346 fn nsmallest_keep_last_breaks_ties_by_later_position() {
17347 let col = Column::from_values(vec![
17348 Scalar::Int64(1), Scalar::Int64(2),
17350 Scalar::Int64(1), Scalar::Int64(3),
17352 Scalar::Int64(1), ])
17354 .expect("col");
17355 let r = col.nsmallest_keep(2, "last").expect("nsmallest_keep");
17357 assert_eq!(r.len(), 2);
17358 assert_eq!(r.values()[0], Scalar::Int64(1));
17359 assert_eq!(r.values()[1], Scalar::Int64(1));
17360 }
17361
17362 #[test]
17363 fn nsmallest_keep_all_expands_beyond_n_on_ties() {
17364 let col = Column::from_values(vec![
17365 Scalar::Int64(1),
17366 Scalar::Int64(1),
17367 Scalar::Int64(1),
17368 Scalar::Int64(2),
17369 ])
17370 .expect("col");
17371 let r = col.nsmallest_keep(1, "all").expect("nsmallest_keep");
17373 assert_eq!(r.len(), 3);
17374 assert_eq!(r.values()[0], Scalar::Int64(1));
17375 assert_eq!(r.values()[1], Scalar::Int64(1));
17376 assert_eq!(r.values()[2], Scalar::Int64(1));
17377 }
17378
17379 #[test]
17380 fn nlargest_keep_mirror_symmetry() {
17381 let col = Column::from_values(vec![
17382 Scalar::Int64(1),
17383 Scalar::Int64(3),
17384 Scalar::Int64(3),
17385 Scalar::Int64(2),
17386 ])
17387 .expect("col");
17388 let r = col.nlargest_keep(1, "all").expect("nlargest_keep");
17389 assert_eq!(r.len(), 2);
17390 assert_eq!(r.values()[0], Scalar::Int64(3));
17391 assert_eq!(r.values()[1], Scalar::Int64(3));
17392 }
17393
17394 #[test]
17395 fn nkeep_invalid_keep_errors() {
17396 let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
17397 assert!(col.nsmallest_keep(1, "middle").is_err());
17398 assert!(col.nlargest_keep(1, "middle").is_err());
17399 }
17400
17401 #[test]
17402 fn nkeep_zero_is_empty_same_dtype() {
17403 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
17404 let r = col.nsmallest_keep(0, "first").expect("nsmallest_keep");
17405 assert!(r.is_empty());
17406 assert_eq!(r.dtype(), DType::Int64);
17407 }
17408
17409 #[test]
17410 fn first_valid_last_valid_all_missing_is_none() {
17411 let col = Column::from_values(vec![
17412 Scalar::Null(NullKind::NaN),
17413 Scalar::Null(NullKind::Null),
17414 ])
17415 .expect("col");
17416 assert_eq!(col.first_valid(), None);
17417 assert_eq!(col.last_valid(), None);
17418 assert_eq!(col.first_valid_index(), None);
17419 assert_eq!(col.last_valid_index(), None);
17420 }
17421
17422 #[test]
17423 fn rolling_window_sum_empty_column() {
17424 let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
17425 let r = col.rolling_window_sum(3, 1).expect("rolling");
17426 assert!(r.is_empty());
17427 assert_eq!(r.dtype(), DType::Float64);
17428 }
17429
17430 #[test]
17431 fn pct_change_negative_periods() {
17432 let col = Column::from_values(vec![Scalar::Float64(10.0), Scalar::Float64(15.0)])
17433 .expect("col");
17434 let r = col.pct_change(-1).expect("pct_change");
17435 assert!(
17437 matches!(&r.values()[0], Scalar::Float64(v) if (*v + 1.0 / 3.0).abs() < 1e-9),
17438 "expected Float64, got {:?}",
17439 r.values()[0]
17440 );
17441 assert!(r.values()[1].is_missing());
17442 }
17443
17444 #[test]
17445 fn count_excludes_nulls() {
17446 let col = Column::from_values(vec![
17447 Scalar::Int64(1),
17448 Scalar::Null(NullKind::NaN),
17449 Scalar::Int64(2),
17450 Scalar::Null(NullKind::Null),
17451 ])
17452 .expect("col");
17453 assert_eq!(col.count(), 2);
17454 }
17455 }
17456
17457 mod where_mask {
17458 use fp_types::NullKind;
17459
17460 use super::*;
17461
17462 #[test]
17463 fn where_cond_keeps_true_positions() {
17464 let col =
17465 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
17466 .expect("col");
17467 let cond = Column::from_values(vec![
17468 Scalar::Bool(true),
17469 Scalar::Bool(false),
17470 Scalar::Bool(true),
17471 ])
17472 .expect("cond");
17473 let fill = Scalar::Int64(-1);
17474 let out = col.where_cond(&cond, &fill).expect("where");
17475 assert_eq!(col.r#where(&cond, &fill).expect("where"), out);
17476 assert_eq!(out.values()[0], Scalar::Int64(1));
17477 assert_eq!(out.values()[1], Scalar::Int64(-1));
17478 assert_eq!(out.values()[2], Scalar::Int64(3));
17479 }
17480
17481 #[test]
17482 fn mask_inverts_where_cond() {
17483 let col =
17484 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
17485 .expect("col");
17486 let cond = Column::from_values(vec![
17487 Scalar::Bool(true),
17488 Scalar::Bool(false),
17489 Scalar::Bool(true),
17490 ])
17491 .expect("cond");
17492 let fill = Scalar::Int64(0);
17493 let out = col.mask(&cond, &fill).expect("mask");
17494 assert_eq!(out.values()[0], Scalar::Int64(0));
17495 assert_eq!(out.values()[1], Scalar::Int64(2));
17496 assert_eq!(out.values()[2], Scalar::Int64(0));
17497 }
17498
17499 #[test]
17500 fn where_missing_cond_propagates_null() {
17501 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
17502 let cond = Column::from_values(vec![Scalar::Bool(true), Scalar::Null(NullKind::NaN)])
17503 .expect("cond");
17504 let fill = Scalar::Int64(-1);
17505 let out = col.where_cond(&cond, &fill).expect("where");
17506 assert_eq!(out.values()[0], Scalar::Int64(1));
17507 assert!(out.values()[1].is_missing());
17508 }
17509
17510 #[test]
17511 fn where_rejects_non_bool_cond() {
17512 let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
17513 let cond = Column::from_values(vec![Scalar::Int64(1)]).expect("cond");
17514 let err = col.where_cond(&cond, &Scalar::Int64(0)).unwrap_err();
17515 assert!(matches!(err, crate::ColumnError::InvalidMaskType { .. }));
17516 }
17517
17518 #[test]
17519 fn equals_elementwise_matches_semantic_eq() {
17520 let a = Column::from_values(vec![
17521 Scalar::Int64(1),
17522 Scalar::Int64(2),
17523 Scalar::Null(NullKind::NaN),
17524 ])
17525 .expect("a");
17526 let b = Column::from_values(vec![
17527 Scalar::Int64(1),
17528 Scalar::Int64(3),
17529 Scalar::Null(NullKind::NaN),
17530 ])
17531 .expect("b");
17532 let r = a.equals(&b).expect("equals");
17533 assert_eq!(r.dtype(), DType::Bool);
17534 assert_eq!(r.values()[0], Scalar::Bool(true));
17535 assert_eq!(r.values()[1], Scalar::Bool(false));
17536 assert_eq!(r.values()[2], Scalar::Bool(false));
17538 }
17539
17540 #[test]
17541 fn equals_length_mismatch_errors() {
17542 let a = Column::from_values(vec![Scalar::Int64(1)]).expect("a");
17543 let b = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("b");
17544 assert!(a.equals(&b).is_err());
17545 }
17546
17547 #[test]
17548 fn dot_ignores_missing() {
17549 let a = Column::from_values(vec![
17550 Scalar::Float64(1.0),
17551 Scalar::Null(NullKind::NaN),
17552 Scalar::Float64(3.0),
17553 ])
17554 .expect("a");
17555 let b = Column::from_values(vec![
17556 Scalar::Float64(2.0),
17557 Scalar::Float64(4.0),
17558 Scalar::Float64(5.0),
17559 ])
17560 .expect("b");
17561 let r = a.dot(&b).expect("dot");
17562 assert!((r - 17.0).abs() < 1e-9);
17564 }
17565
17566 #[test]
17567 fn dot_non_numeric_errors() {
17568 let a = Column::from_values(vec![Scalar::Utf8("x".into())]).expect("a");
17569 let b = Column::from_values(vec![Scalar::Float64(1.0)]).expect("b");
17570 assert!(a.dot(&b).is_err());
17571 }
17572
17573 #[test]
17574 fn fillna_with_column_fills_missing_positions() {
17575 let a = Column::from_values(vec![
17576 Scalar::Int64(1),
17577 Scalar::Null(NullKind::NaN),
17578 Scalar::Int64(3),
17579 ])
17580 .expect("a");
17581 let b = Column::from_values(vec![
17582 Scalar::Int64(10),
17583 Scalar::Int64(20),
17584 Scalar::Int64(30),
17585 ])
17586 .expect("b");
17587 let r = a.fillna_with_column(&b).expect("fillna_with_column");
17588 assert_eq!(r.values()[0], Scalar::Int64(1));
17589 assert_eq!(r.values()[1], Scalar::Int64(20));
17590 assert_eq!(r.values()[2], Scalar::Int64(3));
17591 }
17592
17593 #[test]
17594 fn divmod_returns_quotient_and_remainder() {
17595 let a = Column::from_values(vec![
17596 Scalar::Float64(10.0),
17597 Scalar::Float64(7.0),
17598 Scalar::Float64(-5.0),
17599 ])
17600 .expect("a");
17601 let b = Column::from_values(vec![
17602 Scalar::Float64(3.0),
17603 Scalar::Float64(2.0),
17604 Scalar::Float64(3.0),
17605 ])
17606 .expect("b");
17607 let (q, r) = a.divmod(&b).expect("divmod");
17608 match (&q.values()[0], &r.values()[0]) {
17610 (Scalar::Float64(qv), Scalar::Float64(rv)) => {
17611 assert!((qv - 3.0).abs() < 1e-9);
17612 assert!((rv - 1.0).abs() < 1e-9);
17613 }
17614 other => unreachable!("unexpected {other:?}"),
17615 }
17616 match (&q.values()[1], &r.values()[1]) {
17618 (Scalar::Float64(qv), Scalar::Float64(rv)) => {
17619 assert!((qv - 3.0).abs() < 1e-9);
17620 assert!((rv - 1.0).abs() < 1e-9);
17621 }
17622 other => unreachable!("unexpected {other:?}"),
17623 }
17624 match (&q.values()[2], &r.values()[2]) {
17626 (Scalar::Float64(qv), Scalar::Float64(rv)) => {
17627 assert!((qv + 2.0).abs() < 1e-9);
17628 assert!((rv - 1.0).abs() < 1e-9);
17629 }
17630 other => unreachable!("unexpected {other:?}"),
17631 }
17632 }
17633
17634 #[test]
17635 fn divmod_zero_divisor_yields_null() {
17636 let a = Column::from_values(vec![Scalar::Float64(10.0)]).expect("a");
17637 let b = Column::from_values(vec![Scalar::Float64(0.0)]).expect("b");
17638 let (q, r) = a.divmod(&b).expect("divmod");
17639 assert!(q.values()[0].is_missing());
17640 assert!(r.values()[0].is_missing());
17641 }
17642
17643 #[test]
17644 fn divmod_infinite_operands_match_pandas_float_semantics() {
17645 let a = Column::from_values(vec![
17646 Scalar::Float64(f64::INFINITY),
17647 Scalar::Float64(f64::NEG_INFINITY),
17648 Scalar::Float64(5.0),
17649 Scalar::Float64(-5.0),
17650 Scalar::Float64(f64::INFINITY),
17651 ])
17652 .expect("a");
17653 let b = Column::from_values(vec![
17654 Scalar::Float64(2.0),
17655 Scalar::Float64(-2.0),
17656 Scalar::Float64(f64::INFINITY),
17657 Scalar::Float64(f64::INFINITY),
17658 Scalar::Float64(f64::INFINITY),
17659 ])
17660 .expect("b");
17661
17662 let (q, r) = a.divmod(&b).expect("divmod");
17663 assert!(matches!(q.values()[0], Scalar::Float64(v) if v.is_nan()));
17664 assert!(matches!(r.values()[0], Scalar::Float64(v) if v.is_nan()));
17665 assert!(matches!(q.values()[1], Scalar::Float64(v) if v.is_nan()));
17666 assert!(matches!(r.values()[1], Scalar::Float64(v) if v.is_nan()));
17667 assert_eq!(q.values()[2], Scalar::Float64(0.0));
17668 assert_eq!(r.values()[2], Scalar::Float64(5.0));
17669 assert_eq!(q.values()[3], Scalar::Float64(-1.0));
17670 assert_eq!(r.values()[3], Scalar::Float64(f64::INFINITY));
17671 assert!(matches!(q.values()[4], Scalar::Float64(v) if v.is_nan()));
17672 assert!(matches!(r.values()[4], Scalar::Float64(v) if v.is_nan()));
17673 }
17674
17675 #[test]
17676 fn where_cond_series_fills_from_other_column() {
17677 let col =
17678 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
17679 .expect("col");
17680 let cond = Column::from_values(vec![
17681 Scalar::Bool(true),
17682 Scalar::Bool(false),
17683 Scalar::Bool(true),
17684 ])
17685 .expect("cond");
17686 let other = Column::from_values(vec![
17687 Scalar::Int64(10),
17688 Scalar::Int64(20),
17689 Scalar::Int64(30),
17690 ])
17691 .expect("other");
17692 let out = col.where_cond_series(&cond, &other).expect("where_series");
17693 assert_eq!(out.values()[0], Scalar::Int64(1));
17694 assert_eq!(out.values()[1], Scalar::Int64(20));
17695 assert_eq!(out.values()[2], Scalar::Int64(3));
17696 }
17697
17698 #[test]
17699 fn mask_series_fills_from_other_column() {
17700 let col =
17701 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
17702 .expect("col");
17703 let cond = Column::from_values(vec![
17704 Scalar::Bool(true),
17705 Scalar::Bool(false),
17706 Scalar::Bool(true),
17707 ])
17708 .expect("cond");
17709 let other =
17710 Column::from_values(vec![Scalar::Int64(0), Scalar::Int64(0), Scalar::Int64(0)])
17711 .expect("other");
17712 let out = col.mask_series(&cond, &other).expect("mask_series");
17713 assert_eq!(out.values()[0], Scalar::Int64(0));
17714 assert_eq!(out.values()[1], Scalar::Int64(2));
17715 assert_eq!(out.values()[2], Scalar::Int64(0));
17716 }
17717
17718 #[test]
17719 fn where_cond_series_rejects_non_bool_cond() {
17720 let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
17721 let cond = Column::from_values(vec![Scalar::Int64(1)]).expect("cond");
17722 let other = Column::from_values(vec![Scalar::Int64(0)]).expect("other");
17723 let err = col.where_cond_series(&cond, &other).unwrap_err();
17724 assert!(matches!(err, crate::ColumnError::InvalidMaskType { .. }));
17725 }
17726
17727 #[test]
17728 fn replace_values_applies_first_match() {
17729 let col = Column::from_values(vec![
17730 Scalar::Int64(1),
17731 Scalar::Int64(2),
17732 Scalar::Int64(3),
17733 Scalar::Int64(2),
17734 ])
17735 .expect("col");
17736 let to_replace = vec![Scalar::Int64(2), Scalar::Int64(3)];
17737 let replacement = vec![Scalar::Int64(20), Scalar::Int64(30)];
17738 let out = col
17739 .replace_values(&to_replace, &replacement)
17740 .expect("replace");
17741 let alias = col
17742 .replace(&to_replace, &replacement)
17743 .expect("replace alias");
17744 assert_eq!(alias, out);
17745 assert_eq!(out.values()[0], Scalar::Int64(1));
17746 assert_eq!(out.values()[1], Scalar::Int64(20));
17747 assert_eq!(out.values()[2], Scalar::Int64(30));
17748 assert_eq!(out.values()[3], Scalar::Int64(20));
17749 }
17750
17751 #[test]
17752 fn replace_values_can_replace_nulls() {
17753 let col = Column::from_values(vec![
17754 Scalar::Int64(1),
17755 Scalar::Null(NullKind::NaN),
17756 Scalar::Int64(2),
17757 ])
17758 .expect("col");
17759 let to_replace = vec![Scalar::Null(NullKind::NaN)];
17760 let replacement = vec![Scalar::Int64(-1)];
17761 let out = col
17762 .replace_values(&to_replace, &replacement)
17763 .expect("replace");
17764 assert_eq!(out.values()[0], Scalar::Int64(1));
17765 assert_eq!(out.values()[1], Scalar::Int64(-1));
17766 assert_eq!(out.values()[2], Scalar::Int64(2));
17767 }
17768
17769 #[test]
17770 fn replace_values_length_mismatch_errors() {
17771 let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
17772 let err = col
17773 .replace_values(&[Scalar::Int64(1)], &[Scalar::Int64(2), Scalar::Int64(3)])
17774 .unwrap_err();
17775 assert!(matches!(err, crate::ColumnError::LengthMismatch { .. }));
17776 }
17777
17778 #[test]
17779 fn nonzero_returns_truthy_positions() {
17780 let col = Column::from_values(vec![
17781 Scalar::Int64(0),
17782 Scalar::Int64(5),
17783 Scalar::Null(NullKind::NaN),
17784 Scalar::Int64(-3),
17785 Scalar::Int64(0),
17786 ])
17787 .expect("col");
17788 assert_eq!(col.nonzero(), vec![1, 3]);
17789 }
17790
17791 #[test]
17792 fn nonzero_empty_column_is_empty() {
17793 let col = Column::from_values(Vec::<Scalar>::new()).expect("col");
17794 assert!(col.nonzero().is_empty());
17795 }
17796
17797 #[test]
17798 fn where_rejects_length_mismatch() {
17799 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
17800 let cond = Column::from_values(vec![Scalar::Bool(true)]).expect("cond");
17801 let err = col.where_cond(&cond, &Scalar::Int64(0)).unwrap_err();
17802 assert!(matches!(err, crate::ColumnError::LengthMismatch { .. }));
17803 }
17804 }
17805
17806 mod nlargest_nsmallest {
17807 use fp_types::NullKind;
17808
17809 use super::*;
17810
17811 #[test]
17812 fn nlargest_returns_top_n_descending() {
17813 let col = Column::from_values(vec![
17814 Scalar::Int64(3),
17815 Scalar::Int64(1),
17816 Scalar::Int64(5),
17817 Scalar::Int64(2),
17818 Scalar::Int64(4),
17819 ])
17820 .expect("col");
17821 let top = col.nlargest(3).expect("nlargest");
17822 assert_eq!(top.len(), 3);
17823 assert_eq!(top.values()[0], Scalar::Int64(5));
17824 assert_eq!(top.values()[1], Scalar::Int64(4));
17825 assert_eq!(top.values()[2], Scalar::Int64(3));
17826 }
17827
17828 #[test]
17829 fn nsmallest_returns_bottom_n_ascending() {
17830 let col = Column::from_values(vec![
17831 Scalar::Int64(3),
17832 Scalar::Int64(1),
17833 Scalar::Int64(5),
17834 Scalar::Int64(2),
17835 Scalar::Int64(4),
17836 ])
17837 .expect("col");
17838 let bot = col.nsmallest(2).expect("nsmallest");
17839 assert_eq!(bot.len(), 2);
17840 assert_eq!(bot.values()[0], Scalar::Int64(1));
17841 assert_eq!(bot.values()[1], Scalar::Int64(2));
17842 }
17843
17844 #[test]
17845 fn nlargest_excludes_missing_when_n_fits() {
17846 let col = Column::from_values(vec![
17847 Scalar::Int64(5),
17848 Scalar::Null(NullKind::NaN),
17849 Scalar::Int64(3),
17850 Scalar::Int64(7),
17851 ])
17852 .expect("col");
17853 let top = col.nlargest(2).expect("nlargest");
17854 assert_eq!(top.len(), 2);
17855 assert_eq!(top.values()[0], Scalar::Int64(7));
17856 assert_eq!(top.values()[1], Scalar::Int64(5));
17857 }
17858
17859 #[test]
17860 fn nlargest_n_larger_than_length_clamps() {
17861 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
17862 let top = col.nlargest(100).expect("nlargest");
17863 assert_eq!(top.len(), 2);
17864 }
17865
17866 #[test]
17867 fn nlargest_zero_is_empty_same_dtype() {
17868 let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
17869 let top = col.nlargest(0).expect("nlargest");
17870 assert!(top.is_empty());
17871 assert_eq!(top.dtype(), DType::Int64);
17872 }
17873 }
17874
17875 mod astype {
17876 use fp_types::NullKind;
17877
17878 use super::*;
17879
17880 #[test]
17881 fn astype_int_to_float_preserves_values() {
17882 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
17883 let out = col.astype(DType::Float64).expect("astype");
17884 assert_eq!(out.dtype(), DType::Float64);
17885 assert_eq!(out.values()[0], Scalar::Float64(1.0));
17886 assert_eq!(out.values()[1], Scalar::Float64(2.0));
17887 }
17888
17889 #[test]
17890 fn astype_same_dtype_is_noop_clone() {
17891 let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
17892 let out = col.astype(DType::Int64).expect("astype");
17893 assert_eq!(out.values(), col.values());
17894 }
17895
17896 #[test]
17897 fn astype_bool_to_int() {
17898 let col =
17899 Column::from_values(vec![Scalar::Bool(true), Scalar::Bool(false)]).expect("col");
17900 let out = col.astype(DType::Int64).expect("astype");
17901 assert_eq!(out.dtype(), DType::Int64);
17902 assert_eq!(out.values()[0], Scalar::Int64(1));
17903 assert_eq!(out.values()[1], Scalar::Int64(0));
17904 }
17905
17906 #[test]
17907 fn astype_to_utf8_uses_pandas_string_spellings() {
17908 let bool_col = Column::new(DType::Bool, vec![Scalar::Bool(true), Scalar::Bool(false)])
17909 .expect("bool col");
17910 let int_col = Column::new(DType::Int64, vec![Scalar::Int64(-7)]).expect("int col");
17911 let float_col = Column::new(
17912 DType::Float64,
17913 vec![Scalar::Float64(1.0), Scalar::Null(NullKind::NaN)],
17914 )
17915 .expect("float col");
17916
17917 let bool_out = bool_col.astype(DType::Utf8).expect("astype bool");
17918 let int_out = int_col.astype(DType::Utf8).expect("astype int");
17919 let float_out = float_col.astype(DType::Utf8).expect("astype float");
17920
17921 assert_eq!(bool_out.dtype(), DType::Utf8);
17922 assert_eq!(
17923 bool_out.values(),
17924 &[
17925 Scalar::Utf8("True".to_owned()),
17926 Scalar::Utf8("False".to_owned()),
17927 ]
17928 );
17929 assert_eq!(int_out.values(), &[Scalar::Utf8("-7".to_owned())]);
17930 assert_eq!(
17931 float_out.values(),
17932 &[
17933 Scalar::Utf8("1.0".to_owned()),
17934 Scalar::Utf8("nan".to_owned()),
17935 ]
17936 );
17937 }
17938
17939 #[test]
17940 fn astype_propagates_missing() {
17941 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Null(NullKind::NaN)])
17942 .expect("col");
17943 let out = col.astype(DType::Float64).expect("astype");
17944 assert_eq!(out.values()[0], Scalar::Float64(1.0));
17945 assert!(out.values()[1].is_missing());
17946 }
17947
17948 #[test]
17949 fn astype_finite_float_to_int_truncates_toward_zero() {
17950 let col = Column::from_values(vec![
17953 Scalar::Float64(1.5),
17954 Scalar::Float64(2.9),
17955 Scalar::Float64(-1.5),
17956 Scalar::Float64(-2.9),
17957 Scalar::Float64(0.4),
17958 ])
17959 .expect("col");
17960 let out = col.astype(DType::Int64).expect("truncating cast");
17961 assert_eq!(
17962 out.values(),
17963 &[
17964 Scalar::Int64(1),
17965 Scalar::Int64(2),
17966 Scalar::Int64(-1),
17967 Scalar::Int64(-2),
17968 Scalar::Int64(0),
17969 ]
17970 );
17971 let inf = Column::from_values(vec![Scalar::Float64(f64::INFINITY)]).expect("col");
17973 assert!(matches!(
17974 inf.astype(DType::Int64).unwrap_err(),
17975 crate::ColumnError::Type(_)
17976 ));
17977 }
17978
17979 #[test]
17980 fn new_int64_from_lossy_float_errors_unlike_astype() {
17981 let err = Column::new(DType::Int64, vec![Scalar::Float64(1.5)]).unwrap_err();
17985 assert!(matches!(
17986 err,
17987 crate::ColumnError::Type(fp_types::TypeError::LossyFloatToInt { .. })
17988 ));
17989 let ok = Column::new(DType::Int64, vec![Scalar::Float64(2.0)]).expect("integer float");
17991 assert_eq!(ok.values(), &[Scalar::Int64(2)]);
17992 }
17993 }
17994
17995 mod rank_searchsorted {
17996 use fp_types::NullKind;
17997
17998 use super::*;
17999
18000 #[test]
18001 fn rank_average_ties_get_midpoint() {
18002 let col = Column::from_values(vec![
18003 Scalar::Float64(10.0),
18004 Scalar::Float64(20.0),
18005 Scalar::Float64(20.0),
18006 Scalar::Float64(30.0),
18007 ])
18008 .expect("col");
18009 let r = col.rank("average", true).expect("rank");
18010 assert_eq!(r.values()[0], Scalar::Float64(1.0));
18011 assert_eq!(r.values()[1], Scalar::Float64(2.5));
18013 assert_eq!(r.values()[2], Scalar::Float64(2.5));
18014 assert_eq!(r.values()[3], Scalar::Float64(4.0));
18015 }
18016
18017 #[test]
18018 fn rank_min_assigns_lowest_tied_rank() {
18019 let col = Column::from_values(vec![
18020 Scalar::Int64(1),
18021 Scalar::Int64(2),
18022 Scalar::Int64(2),
18023 Scalar::Int64(3),
18024 ])
18025 .expect("col");
18026 let r = col.rank("min", true).expect("rank");
18027 assert_eq!(r.values()[1], Scalar::Float64(2.0));
18028 assert_eq!(r.values()[2], Scalar::Float64(2.0));
18029 assert_eq!(r.values()[3], Scalar::Float64(4.0));
18030 }
18031
18032 #[test]
18033 fn rank_max_assigns_highest_tied_rank() {
18034 let col = Column::from_values(vec![
18035 Scalar::Int64(1),
18036 Scalar::Int64(2),
18037 Scalar::Int64(2),
18038 Scalar::Int64(3),
18039 ])
18040 .expect("col");
18041 let r = col.rank("max", true).expect("rank");
18042 assert_eq!(r.values()[1], Scalar::Float64(3.0));
18043 assert_eq!(r.values()[2], Scalar::Float64(3.0));
18044 assert_eq!(r.values()[3], Scalar::Float64(4.0));
18045 }
18046
18047 #[test]
18048 fn rank_first_breaks_ties_by_appearance_order() {
18049 let col =
18050 Column::from_values(vec![Scalar::Int64(5), Scalar::Int64(3), Scalar::Int64(3)])
18051 .expect("col");
18052 let r = col.rank("first", true).expect("rank");
18053 assert_eq!(r.values()[0], Scalar::Float64(3.0));
18055 assert_eq!(r.values()[1], Scalar::Float64(1.0));
18056 assert_eq!(r.values()[2], Scalar::Float64(2.0));
18057 }
18058
18059 #[test]
18060 fn rank_dense_has_no_gaps() {
18061 let col = Column::from_values(vec![
18062 Scalar::Int64(1),
18063 Scalar::Int64(2),
18064 Scalar::Int64(2),
18065 Scalar::Int64(3),
18066 ])
18067 .expect("col");
18068 let r = col.rank("dense", true).expect("rank");
18069 assert_eq!(r.values()[0], Scalar::Float64(1.0));
18070 assert_eq!(r.values()[1], Scalar::Float64(2.0));
18071 assert_eq!(r.values()[2], Scalar::Float64(2.0));
18072 assert_eq!(r.values()[3], Scalar::Float64(3.0));
18073 }
18074
18075 #[test]
18076 fn rank_null_inputs_stay_null() {
18077 let col = Column::from_values(vec![
18078 Scalar::Float64(1.0),
18079 Scalar::Null(NullKind::NaN),
18080 Scalar::Float64(2.0),
18081 ])
18082 .expect("col");
18083 let r = col.rank("average", true).expect("rank");
18084 assert_eq!(r.values()[0], Scalar::Float64(1.0));
18085 assert!(r.values()[1].is_missing());
18086 assert_eq!(r.values()[2], Scalar::Float64(2.0));
18087 }
18088
18089 #[test]
18090 fn rank_descending_reverses_assignment() {
18091 let col =
18092 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
18093 .expect("col");
18094 let r = col.rank("min", false).expect("rank");
18095 assert_eq!(r.values()[0], Scalar::Float64(3.0));
18096 assert_eq!(r.values()[1], Scalar::Float64(2.0));
18097 assert_eq!(r.values()[2], Scalar::Float64(1.0));
18098 }
18099
18100 #[test]
18101 fn rank_invalid_method_errors() {
18102 let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
18103 let err = col.rank("bogus", true).unwrap_err();
18104 assert!(matches!(err, crate::ColumnError::Type(_)));
18105 }
18106
18107 #[test]
18108 fn searchsorted_left_finds_first_insertion() {
18109 let col = Column::from_values(vec![
18110 Scalar::Int64(1),
18111 Scalar::Int64(2),
18112 Scalar::Int64(2),
18113 Scalar::Int64(5),
18114 ])
18115 .expect("col");
18116 assert_eq!(col.searchsorted(&Scalar::Int64(2), "left").unwrap(), 1);
18117 assert_eq!(col.searchsorted(&Scalar::Int64(0), "left").unwrap(), 0);
18118 assert_eq!(col.searchsorted(&Scalar::Int64(6), "left").unwrap(), 4);
18119 }
18120
18121 #[test]
18122 fn searchsorted_right_finds_last_insertion() {
18123 let col = Column::from_values(vec![
18124 Scalar::Int64(1),
18125 Scalar::Int64(2),
18126 Scalar::Int64(2),
18127 Scalar::Int64(5),
18128 ])
18129 .expect("col");
18130 assert_eq!(col.searchsorted(&Scalar::Int64(2), "right").unwrap(), 3);
18131 }
18132
18133 #[test]
18134 fn searchsorted_rejects_invalid_side() {
18135 let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
18136 let err = col.searchsorted(&Scalar::Int64(0), "middle").unwrap_err();
18137 assert!(matches!(err, crate::ColumnError::Type(_)));
18138 }
18139
18140 #[test]
18141 fn searchsorted_rejects_missing_needle() {
18142 let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
18143 let err = col
18144 .searchsorted(&Scalar::Null(NullKind::NaN), "left")
18145 .unwrap_err();
18146 assert!(matches!(err, crate::ColumnError::Type(_)));
18147 }
18148
18149 #[test]
18150 fn searchsorted_treats_trailing_nulls_as_greater() {
18151 let col = Column::from_values(vec![
18152 Scalar::Int64(1),
18153 Scalar::Int64(2),
18154 Scalar::Null(NullKind::NaN),
18155 ])
18156 .expect("col");
18157 assert_eq!(col.searchsorted(&Scalar::Int64(3), "left").unwrap(), 2);
18159 }
18160
18161 #[test]
18162 fn searchsorted_values_left_returns_positions_column() {
18163 let col = Column::from_values(vec![
18164 Scalar::Int64(1),
18165 Scalar::Int64(2),
18166 Scalar::Int64(2),
18167 Scalar::Int64(5),
18168 ])
18169 .expect("col");
18170 let positions = col
18171 .searchsorted_values(
18172 &[Scalar::Int64(0), Scalar::Int64(2), Scalar::Int64(6)],
18173 "left",
18174 )
18175 .expect("searchsorted");
18176 assert_eq!(positions.dtype(), DType::Int64);
18177 assert_eq!(
18178 positions.values(),
18179 &[Scalar::Int64(0), Scalar::Int64(1), Scalar::Int64(4)]
18180 );
18181 }
18182
18183 #[test]
18184 fn searchsorted_values_right_returns_positions_column() {
18185 let col = Column::from_values(vec![
18186 Scalar::Int64(1),
18187 Scalar::Int64(2),
18188 Scalar::Int64(2),
18189 Scalar::Int64(5),
18190 ])
18191 .expect("col");
18192 let positions = col
18193 .searchsorted_values(
18194 &[Scalar::Int64(0), Scalar::Int64(2), Scalar::Int64(6)],
18195 "right",
18196 )
18197 .expect("searchsorted");
18198 assert_eq!(
18199 positions.values(),
18200 &[Scalar::Int64(0), Scalar::Int64(3), Scalar::Int64(4)]
18201 );
18202 }
18203
18204 #[test]
18205 fn searchsorted_values_rejects_invalid_side() {
18206 let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
18207 let err = col
18208 .searchsorted_values(&[Scalar::Int64(0)], "middle")
18209 .unwrap_err();
18210 assert!(matches!(err, crate::ColumnError::Type(_)));
18211 }
18212
18213 #[test]
18214 fn searchsorted_values_rejects_missing_needles() {
18215 let col = Column::from_values(vec![Scalar::Int64(1)]).expect("col");
18216 let err = col
18217 .searchsorted_values(&[Scalar::Null(NullKind::NaN)], "left")
18218 .unwrap_err();
18219 assert!(matches!(err, crate::ColumnError::Type(_)));
18220 }
18221
18222 #[test]
18223 fn searchsorted_with_sorter_uses_argsort_permutation() {
18224 let col = Column::from_values(vec![
18225 Scalar::Int64(5),
18226 Scalar::Int64(1),
18227 Scalar::Int64(2),
18228 Scalar::Int64(2),
18229 ])
18230 .expect("col");
18231 let sorter = col.argsort();
18232 assert_eq!(
18233 col.searchsorted_with_sorter(&Scalar::Int64(2), "left", &sorter)
18234 .unwrap(),
18235 1
18236 );
18237 assert_eq!(
18238 col.searchsorted_with_sorter(&Scalar::Int64(2), "right", &sorter)
18239 .unwrap(),
18240 3
18241 );
18242 assert_eq!(
18243 col.searchsorted_with_sorter(&Scalar::Int64(6), "left", &sorter)
18244 .unwrap(),
18245 4
18246 );
18247 }
18248
18249 #[test]
18250 fn searchsorted_values_with_sorter_returns_positions_column() {
18251 let col = Column::from_values(vec![
18252 Scalar::Int64(5),
18253 Scalar::Int64(1),
18254 Scalar::Int64(2),
18255 Scalar::Int64(2),
18256 ])
18257 .expect("col");
18258 let sorter = col.argsort();
18259 let positions = col
18260 .searchsorted_values_with_sorter(
18261 &[Scalar::Int64(0), Scalar::Int64(2), Scalar::Int64(6)],
18262 "left",
18263 &sorter,
18264 )
18265 .expect("searchsorted");
18266 assert_eq!(
18267 positions.values(),
18268 &[Scalar::Int64(0), Scalar::Int64(1), Scalar::Int64(4)]
18269 );
18270 }
18271
18272 #[test]
18273 fn searchsorted_with_sorter_rejects_length_mismatch() {
18274 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
18275 let err = col
18276 .searchsorted_with_sorter(&Scalar::Int64(1), "left", &[0])
18277 .unwrap_err();
18278 assert!(matches!(
18279 err,
18280 crate::ColumnError::LengthMismatch { left: 2, right: 1 }
18281 ));
18282 }
18283
18284 #[test]
18285 fn searchsorted_with_sorter_rejects_duplicate_or_oob_indices() {
18286 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2)]).expect("col");
18287 let duplicate = col
18288 .searchsorted_with_sorter(&Scalar::Int64(1), "left", &[0, 0])
18289 .unwrap_err();
18290 assert!(matches!(
18291 duplicate,
18292 crate::ColumnError::InvalidSorter { .. }
18293 ));
18294
18295 let out_of_bounds = col
18296 .searchsorted_with_sorter(&Scalar::Int64(1), "left", &[0, 2])
18297 .unwrap_err();
18298 assert!(matches!(
18299 out_of_bounds,
18300 crate::ColumnError::InvalidSorter { .. }
18301 ));
18302 }
18303 }
18304
18305 mod value_counts {
18306 use fp_types::NullKind;
18307
18308 use super::*;
18309
18310 #[test]
18311 fn value_counts_default_drops_missing_and_sorts_descending() {
18312 let col = Column::from_values(vec![
18313 Scalar::Int64(3),
18314 Scalar::Int64(1),
18315 Scalar::Null(NullKind::NaN),
18316 Scalar::Int64(3),
18317 Scalar::Int64(2),
18318 Scalar::Int64(1),
18319 Scalar::Int64(3),
18320 ])
18321 .expect("col");
18322
18323 let (values, counts) = col.value_counts().expect("value_counts");
18324 assert_eq!(
18325 values.values(),
18326 &[Scalar::Int64(3), Scalar::Int64(1), Scalar::Int64(2)]
18327 );
18328 assert_eq!(
18329 counts.values(),
18330 &[Scalar::Int64(3), Scalar::Int64(2), Scalar::Int64(1)]
18331 );
18332 }
18333
18334 #[test]
18335 fn value_counts_sort_false_preserves_first_seen_order() {
18336 let col = Column::from_values(vec![
18337 Scalar::Int64(2),
18338 Scalar::Int64(1),
18339 Scalar::Int64(2),
18340 Scalar::Int64(3),
18341 Scalar::Int64(1),
18342 ])
18343 .expect("col");
18344
18345 let (values, counts) = col
18346 .value_counts_with_options(false, false, false, true)
18347 .expect("value_counts");
18348 assert_eq!(
18349 values.values(),
18350 &[Scalar::Int64(2), Scalar::Int64(1), Scalar::Int64(3)]
18351 );
18352 assert_eq!(
18353 counts.values(),
18354 &[Scalar::Int64(2), Scalar::Int64(2), Scalar::Int64(1)]
18355 );
18356 }
18357
18358 #[test]
18359 fn value_counts_dropna_false_includes_missing_bucket() {
18360 let col = Column::from_values(vec![
18361 Scalar::Utf8("a".into()),
18362 Scalar::Null(NullKind::NaN),
18363 Scalar::Utf8("a".into()),
18364 Scalar::Null(NullKind::Null),
18365 ])
18366 .expect("col");
18367
18368 let (values, counts) = col
18369 .value_counts_with_options(false, true, false, false)
18370 .expect("value_counts");
18371 assert_eq!(values.values()[0], Scalar::Utf8("a".into()));
18372 assert!(values.values()[1].is_missing());
18373 assert_eq!(counts.values(), &[Scalar::Int64(2), Scalar::Int64(2)]);
18374 }
18375
18376 #[test]
18377 fn value_counts_normalize_uses_returned_total() {
18378 let col = Column::from_values(vec![
18379 Scalar::Float64(1.0),
18380 Scalar::Float64(2.0),
18381 Scalar::Float64(1.0),
18382 Scalar::Null(NullKind::NaN),
18383 ])
18384 .expect("col");
18385
18386 let (values, counts) = col
18387 .value_counts_with_options(true, true, false, true)
18388 .expect("value_counts");
18389 assert_eq!(
18390 values.values(),
18391 &[Scalar::Float64(1.0), Scalar::Float64(2.0)]
18392 );
18393 assert_eq!(counts.dtype(), DType::Float64);
18394 assert_eq!(
18395 counts.values(),
18396 &[Scalar::Float64(2.0 / 3.0), Scalar::Float64(1.0 / 3.0)]
18397 );
18398 }
18399
18400 #[test]
18401 fn python_mod_f64_handles_infinity_divisor() {
18402 use crate::python_mod_f64;
18403
18404 assert_eq!(python_mod_f64(5.0, f64::INFINITY), 5.0);
18405 assert_eq!(python_mod_f64(-5.0, f64::INFINITY), f64::INFINITY);
18406 assert_eq!(python_mod_f64(5.0, f64::NEG_INFINITY), f64::NEG_INFINITY);
18407 assert_eq!(python_mod_f64(-5.0, f64::NEG_INFINITY), -5.0);
18408 assert_eq!(python_mod_f64(0.0, f64::INFINITY), 0.0);
18409 assert!(python_mod_f64(0.0, f64::NEG_INFINITY).is_sign_negative());
18410 assert_eq!(python_mod_f64(-0.0, f64::INFINITY), 0.0);
18411 assert_eq!(python_mod_f64(-0.0, f64::NEG_INFINITY), -0.0);
18412 assert!(python_mod_f64(f64::NAN, f64::INFINITY).is_nan());
18413 assert!(python_mod_f64(f64::NAN, f64::NEG_INFINITY).is_nan());
18414 assert!(python_mod_f64(f64::INFINITY, f64::INFINITY).is_nan());
18415 assert!(python_mod_f64(f64::NEG_INFINITY, f64::NEG_INFINITY).is_nan());
18416 }
18417
18418 #[test]
18419 fn python_floor_div_f64_handles_infinite_operands() {
18420 use crate::python_floor_div_f64;
18421
18422 assert_eq!(python_floor_div_f64(5.0, f64::INFINITY), 0.0);
18423 assert_eq!(python_floor_div_f64(-5.0, f64::INFINITY), -1.0);
18424 assert_eq!(python_floor_div_f64(5.0, f64::NEG_INFINITY), -1.0);
18425 assert_eq!(python_floor_div_f64(-5.0, f64::NEG_INFINITY), 0.0);
18426 assert_eq!(python_floor_div_f64(0.0, f64::INFINITY), 0.0);
18427 assert!(python_floor_div_f64(-0.0, f64::INFINITY).is_sign_negative());
18428 assert!(python_floor_div_f64(0.0, f64::NEG_INFINITY).is_sign_negative());
18429 assert_eq!(python_floor_div_f64(-0.0, f64::NEG_INFINITY), 0.0);
18430 assert!(python_floor_div_f64(f64::INFINITY, 2.0).is_nan());
18431 assert!(python_floor_div_f64(f64::NEG_INFINITY, -2.0).is_nan());
18432 assert!(python_floor_div_f64(f64::INFINITY, f64::INFINITY).is_nan());
18433 }
18434
18435 #[test]
18436 fn histogram_counts_values_in_bins() {
18437 let col = Column::from_values(vec![
18438 Scalar::Float64(0.5),
18439 Scalar::Float64(1.5),
18440 Scalar::Float64(2.5),
18441 Scalar::Float64(1.2),
18442 Scalar::Float64(2.8),
18443 ])
18444 .unwrap();
18445 let edges = vec![0.0, 1.0, 2.0, 3.0];
18446 let counts = col.histogram(&edges).unwrap();
18447 assert_eq!(
18448 counts.values(),
18449 &[
18450 Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(2), ]
18454 );
18455 }
18456
18457 #[test]
18458 fn histogram_auto_creates_bins() {
18459 let col = Column::from_values(vec![
18460 Scalar::Float64(1.0),
18461 Scalar::Float64(2.0),
18462 Scalar::Float64(3.0),
18463 Scalar::Float64(4.0),
18464 ])
18465 .unwrap();
18466 let (counts, edges) = col.histogram_auto(3).unwrap();
18467 assert_eq!(counts.len(), 3);
18468 assert_eq!(edges.len(), 4);
18469 assert!((edges[0] - 1.0).abs() < 1e-10);
18470 assert!((edges[3] - 4.0).abs() < 1e-10);
18471 }
18472
18473 #[test]
18474 fn histogram_auto_constant_values_extends_range() {
18475 let col = Column::from_values(vec![
18476 Scalar::Float64(5.0),
18477 Scalar::Float64(5.0),
18478 Scalar::Float64(5.0),
18479 ])
18480 .unwrap();
18481 let (counts, edges) = col.histogram_auto(2).unwrap();
18482 assert_eq!(counts.len(), 2);
18483 assert!(edges[0] < 5.0);
18484 assert!(edges[2] > 5.0);
18485 }
18486
18487 #[test]
18488 fn hanning_window_shape() {
18489 let win = Column::hanning(5).unwrap();
18490 assert_eq!(win.len(), 5);
18491 assert!((win.values()[0].to_f64().unwrap()).abs() < 1e-10);
18493 assert!((win.values()[4].to_f64().unwrap()).abs() < 1e-10);
18494 assert!((win.values()[2].to_f64().unwrap() - 1.0).abs() < 1e-10);
18496 }
18497
18498 #[test]
18499 fn hamming_window_shape() {
18500 let win = Column::hamming(5).unwrap();
18501 assert_eq!(win.len(), 5);
18502 let v0 = win.values()[0].to_f64().unwrap();
18504 assert!(v0 > 0.07 && v0 < 0.09);
18505 }
18506
18507 #[test]
18508 fn bartlett_window_triangular() {
18509 let win = Column::bartlett(5).unwrap();
18510 assert_eq!(win.len(), 5);
18511 assert!((win.values()[0].to_f64().unwrap()).abs() < 1e-10);
18513 assert!((win.values()[4].to_f64().unwrap()).abs() < 1e-10);
18514 assert!((win.values()[2].to_f64().unwrap() - 1.0).abs() < 1e-10);
18516 }
18517
18518 #[test]
18519 fn convolve_full_mode() {
18520 let a = Column::from_values(vec![
18521 Scalar::Float64(1.0),
18522 Scalar::Float64(2.0),
18523 Scalar::Float64(3.0),
18524 ])
18525 .unwrap();
18526 let v = Column::from_values(vec![Scalar::Float64(1.0), Scalar::Float64(1.0)]).unwrap();
18527 let result = a.convolve(&v, "full").unwrap();
18528 assert_eq!(result.len(), 4);
18530 assert!((result.values()[0].to_f64().unwrap() - 1.0).abs() < 1e-10);
18531 assert!((result.values()[1].to_f64().unwrap() - 3.0).abs() < 1e-10);
18532 assert!((result.values()[2].to_f64().unwrap() - 5.0).abs() < 1e-10);
18533 assert!((result.values()[3].to_f64().unwrap() - 3.0).abs() < 1e-10);
18534 }
18535
18536 #[test]
18537 fn geomspace_creates_geometric_progression() {
18538 let col = Column::geomspace(1.0, 1000.0, 4).unwrap();
18539 assert_eq!(col.len(), 4);
18540 assert!((col.values()[0].to_f64().unwrap() - 1.0).abs() < 1e-10);
18541 assert!((col.values()[1].to_f64().unwrap() - 10.0).abs() < 1e-10);
18542 assert!((col.values()[2].to_f64().unwrap() - 100.0).abs() < 1e-10);
18543 assert!((col.values()[3].to_f64().unwrap() - 1000.0).abs() < 1e-10);
18544 }
18545
18546 #[test]
18547 fn nan_to_num_replaces_special_values() {
18548 let col = Column::from_values(vec![
18549 Scalar::Float64(1.0),
18550 Scalar::Float64(f64::NAN),
18551 Scalar::Float64(f64::INFINITY),
18552 Scalar::Float64(f64::NEG_INFINITY),
18553 ])
18554 .unwrap();
18555 let result = col.nan_to_num().unwrap();
18556 assert!((result.values()[0].to_f64().unwrap() - 1.0).abs() < 1e-10);
18557 assert!((result.values()[1].to_f64().unwrap() - 0.0).abs() < 1e-10);
18558 assert_eq!(result.values()[2].to_f64().unwrap(), f64::MAX);
18559 assert_eq!(result.values()[3].to_f64().unwrap(), f64::MIN);
18560 }
18561
18562 #[test]
18563 fn rint_rounds_to_nearest_even() {
18564 let col = Column::from_values(vec![
18565 Scalar::Float64(0.5),
18566 Scalar::Float64(1.5),
18567 Scalar::Float64(2.5),
18568 Scalar::Float64(3.5),
18569 ])
18570 .unwrap();
18571 let result = col.rint().unwrap();
18572 assert!((result.values()[0].to_f64().unwrap() - 0.0).abs() < 1e-10);
18574 assert!((result.values()[1].to_f64().unwrap() - 2.0).abs() < 1e-10);
18575 assert!((result.values()[2].to_f64().unwrap() - 2.0).abs() < 1e-10);
18576 assert!((result.values()[3].to_f64().unwrap() - 4.0).abs() < 1e-10);
18577 }
18578
18579 #[test]
18580 fn ldexp_multiplies_by_power_of_two() {
18581 let col = Column::from_values(vec![
18582 Scalar::Float64(1.0),
18583 Scalar::Float64(2.0),
18584 Scalar::Float64(0.5),
18585 ])
18586 .unwrap();
18587 let result = col.ldexp(3).unwrap(); assert!((result.values()[0].to_f64().unwrap() - 8.0).abs() < 1e-10);
18589 assert!((result.values()[1].to_f64().unwrap() - 16.0).abs() < 1e-10);
18590 assert!((result.values()[2].to_f64().unwrap() - 4.0).abs() < 1e-10);
18591 }
18592
18593 #[test]
18594 fn modf_splits_integer_and_fraction() {
18595 let col = Column::from_values(vec![
18596 Scalar::Float64(3.5),
18597 Scalar::Float64(-2.25),
18598 Scalar::Float64(1.0),
18599 ])
18600 .unwrap();
18601 let (frac, int) = col.modf().unwrap();
18602 assert!((frac.values()[0].to_f64().unwrap() - 0.5).abs() < 1e-10);
18603 assert!((int.values()[0].to_f64().unwrap() - 3.0).abs() < 1e-10);
18604 assert!((frac.values()[1].to_f64().unwrap() - (-0.25)).abs() < 1e-10);
18605 assert!((int.values()[1].to_f64().unwrap() - (-2.0)).abs() < 1e-10);
18606 assert!((frac.values()[2].to_f64().unwrap() - 0.0).abs() < 1e-10);
18607 assert!((int.values()[2].to_f64().unwrap() - 1.0).abs() < 1e-10);
18608 }
18609
18610 #[test]
18611 fn spacing_returns_ulp() {
18612 let col = Column::from_values(vec![
18613 Scalar::Float64(1.0),
18614 Scalar::Float64(-1.0),
18615 Scalar::Float64(0.0),
18616 ])
18617 .unwrap();
18618 let result = col.spacing().unwrap();
18619 let s1 = result.values()[0].to_f64().unwrap();
18621 assert!(s1 > 0.0 && s1 < 1e-15);
18622 let s_neg1 = result.values()[1].to_f64().unwrap();
18624 assert!((s1 - s_neg1).abs() < 1e-20);
18625 assert_eq!(result.values()[2].to_f64().unwrap(), f64::from_bits(1));
18627 }
18628
18629 #[test]
18630 fn frexp_decomposes_floats() {
18631 let col = Column::from_values(vec![
18632 Scalar::Float64(4.0),
18633 Scalar::Float64(0.5),
18634 Scalar::Float64(-8.0),
18635 Scalar::Float64(0.0),
18636 ])
18637 .unwrap();
18638 let (mant, exp) = col.frexp().unwrap();
18639 assert!((mant.values()[0].to_f64().unwrap() - 0.5).abs() < 1e-10);
18641 assert_eq!(exp.values()[0].to_i64().unwrap(), 3);
18642 assert!((mant.values()[1].to_f64().unwrap() - 0.5).abs() < 1e-10);
18644 assert_eq!(exp.values()[1].to_i64().unwrap(), 0);
18645 assert!((mant.values()[2].to_f64().unwrap() - (-0.5)).abs() < 1e-10);
18647 assert_eq!(exp.values()[2].to_i64().unwrap(), 4);
18648 assert!((mant.values()[3].to_f64().unwrap() - 0.0).abs() < 1e-10);
18650 assert_eq!(exp.values()[3].to_i64().unwrap(), 0);
18651 }
18652
18653 #[test]
18654 fn nextafter_returns_adjacent_floats() {
18655 let col = Column::from_values(vec![
18656 Scalar::Float64(0.0),
18657 Scalar::Float64(1.0),
18658 Scalar::Float64(1.0),
18659 ])
18660 .unwrap();
18661 let toward = Column::from_values(vec![
18662 Scalar::Float64(1.0),
18663 Scalar::Float64(2.0),
18664 Scalar::Float64(0.0),
18665 ])
18666 .unwrap();
18667 let result = col.nextafter(&toward).unwrap();
18668 assert_eq!(result.values()[0].to_f64().unwrap(), f64::from_bits(1));
18670 let r1 = result.values()[1].to_f64().unwrap();
18672 assert!(r1 > 1.0 && r1 < 1.0 + 1e-15);
18673 let r2 = result.values()[2].to_f64().unwrap();
18675 assert!(r2 < 1.0 && r2 > 1.0 - 1e-15);
18676 }
18677
18678 #[test]
18679 fn isneginf_isposinf_detect_infinities() {
18680 let col = Column::from_values(vec![
18681 Scalar::Float64(f64::NEG_INFINITY),
18682 Scalar::Float64(f64::INFINITY),
18683 Scalar::Float64(1.0),
18684 Scalar::Float64(f64::NAN),
18685 ])
18686 .unwrap();
18687 let neginf = col.isneginf().unwrap();
18688 let posinf = col.isposinf().unwrap();
18689 assert_eq!(neginf.values()[0], Scalar::Bool(true));
18690 assert_eq!(neginf.values()[1], Scalar::Bool(false));
18691 assert_eq!(neginf.values()[2], Scalar::Bool(false));
18692 assert_eq!(neginf.values()[3], Scalar::Bool(false));
18693 assert_eq!(posinf.values()[0], Scalar::Bool(false));
18694 assert_eq!(posinf.values()[1], Scalar::Bool(true));
18695 assert_eq!(posinf.values()[2], Scalar::Bool(false));
18696 assert_eq!(posinf.values()[3], Scalar::Bool(false));
18697 }
18698
18699 #[test]
18700 fn exp2_computes_power_of_two() {
18701 let col = Column::from_values(vec![
18702 Scalar::Float64(0.0),
18703 Scalar::Float64(1.0),
18704 Scalar::Float64(3.0),
18705 Scalar::Float64(-1.0),
18706 ])
18707 .unwrap();
18708 let result = col.exp2().unwrap();
18709 assert!((result.values()[0].to_f64().unwrap() - 1.0).abs() < 1e-10);
18710 assert!((result.values()[1].to_f64().unwrap() - 2.0).abs() < 1e-10);
18711 assert!((result.values()[2].to_f64().unwrap() - 8.0).abs() < 1e-10);
18712 assert!((result.values()[3].to_f64().unwrap() - 0.5).abs() < 1e-10);
18713 }
18714
18715 #[test]
18716 fn sinc_computes_sinc_function() {
18717 let col = Column::from_values(vec![
18718 Scalar::Float64(0.0),
18719 Scalar::Float64(1.0),
18720 Scalar::Float64(0.5),
18721 ])
18722 .unwrap();
18723 let result = col.sinc().unwrap();
18724 assert!((result.values()[0].to_f64().unwrap() - 1.0).abs() < 1e-10);
18726 assert!(result.values()[1].to_f64().unwrap().abs() < 1e-10);
18728 let expected = 2.0 / std::f64::consts::PI;
18730 assert!((result.values()[2].to_f64().unwrap() - expected).abs() < 1e-10);
18731 }
18732
18733 #[test]
18734 fn logaddexp_computes_stable_log_sum() {
18735 let x = Column::from_values(vec![
18736 Scalar::Float64(0.0),
18737 Scalar::Float64(1.0),
18738 Scalar::Float64(-1000.0),
18739 ])
18740 .unwrap();
18741 let y = Column::from_values(vec![
18742 Scalar::Float64(0.0),
18743 Scalar::Float64(2.0),
18744 Scalar::Float64(-1000.0),
18745 ])
18746 .unwrap();
18747 let result = x.logaddexp(&y).unwrap();
18748 assert!((result.values()[0].to_f64().unwrap() - std::f64::consts::LN_2).abs() < 1e-10);
18750 let expected1 = (1.0_f64.exp() + 2.0_f64.exp()).ln();
18752 assert!((result.values()[1].to_f64().unwrap() - expected1).abs() < 1e-10);
18753 let expected2 = -1000.0 + std::f64::consts::LN_2;
18755 assert!((result.values()[2].to_f64().unwrap() - expected2).abs() < 1e-8);
18756 }
18757
18758 #[test]
18759 fn logaddexp2_computes_stable_log2_sum() {
18760 let x = Column::from_values(vec![Scalar::Float64(0.0), Scalar::Float64(1.0)]).unwrap();
18761 let y = Column::from_values(vec![Scalar::Float64(0.0), Scalar::Float64(1.0)]).unwrap();
18762 let result = x.logaddexp2(&y).unwrap();
18763 assert!((result.values()[0].to_f64().unwrap() - 1.0).abs() < 1e-10);
18765 assert!((result.values()[1].to_f64().unwrap() - 2.0).abs() < 1e-10);
18767 }
18768
18769 #[test]
18770 fn roll_shifts_elements_circularly() {
18771 let col = Column::from_values(vec![
18772 Scalar::Int64(1),
18773 Scalar::Int64(2),
18774 Scalar::Int64(3),
18775 Scalar::Int64(4),
18776 Scalar::Int64(5),
18777 ])
18778 .unwrap();
18779 let r1 = col.roll(2).unwrap();
18781 assert_eq!(r1.values()[0].to_i64().unwrap(), 4);
18782 assert_eq!(r1.values()[1].to_i64().unwrap(), 5);
18783 assert_eq!(r1.values()[2].to_i64().unwrap(), 1);
18784 let r2 = col.roll(-2).unwrap();
18786 assert_eq!(r2.values()[0].to_i64().unwrap(), 3);
18787 assert_eq!(r2.values()[1].to_i64().unwrap(), 4);
18788 assert_eq!(r2.values()[2].to_i64().unwrap(), 5);
18789 let r3 = col.roll(0).unwrap();
18791 assert_eq!(r3.values()[0].to_i64().unwrap(), 1);
18792 let r4 = col.roll(5).unwrap();
18793 assert_eq!(r4.values()[0].to_i64().unwrap(), 1);
18794 }
18795
18796 #[test]
18797 fn trim_zeros_removes_leading_trailing() {
18798 let col = Column::from_values(vec![
18799 Scalar::Int64(0),
18800 Scalar::Int64(0),
18801 Scalar::Int64(1),
18802 Scalar::Int64(2),
18803 Scalar::Int64(0),
18804 ])
18805 .unwrap();
18806 let r1 = col.trim_zeros("fb").unwrap();
18808 assert_eq!(r1.len(), 2);
18809 assert_eq!(r1.values()[0].to_i64().unwrap(), 1);
18810 assert_eq!(r1.values()[1].to_i64().unwrap(), 2);
18811 let r2 = col.trim_zeros("f").unwrap();
18813 assert_eq!(r2.len(), 3);
18814 assert_eq!(r2.values()[0].to_i64().unwrap(), 1);
18815 let r3 = col.trim_zeros("b").unwrap();
18817 assert_eq!(r3.len(), 4);
18818 assert_eq!(r3.values()[3].to_i64().unwrap(), 2);
18819 }
18820
18821 #[test]
18822 fn around_rounds_to_decimals() {
18823 let col = Column::from_values(vec![
18824 Scalar::Float64(1.234),
18825 Scalar::Float64(5.678),
18826 Scalar::Float64(3.5),
18827 ])
18828 .unwrap();
18829 let r1 = col.around(2).unwrap();
18831 assert!((r1.values()[0].to_f64().unwrap() - 1.23).abs() < 1e-10);
18832 assert!((r1.values()[1].to_f64().unwrap() - 5.68).abs() < 1e-10);
18833 assert!((r1.values()[2].to_f64().unwrap() - 3.5).abs() < 1e-10);
18834 let r2 = col.around(0).unwrap();
18836 assert!((r2.values()[0].to_f64().unwrap() - 1.0).abs() < 1e-10);
18837 assert!((r2.values()[1].to_f64().unwrap() - 6.0).abs() < 1e-10);
18838 let col2 = Column::from_values(vec![
18840 Scalar::Float64(15.0),
18841 Scalar::Float64(24.0),
18842 Scalar::Float64(35.0),
18843 ])
18844 .unwrap();
18845 let r3 = col2.around(-1).unwrap();
18846 assert!((r3.values()[0].to_f64().unwrap() - 20.0).abs() < 1e-10);
18847 assert!((r3.values()[1].to_f64().unwrap() - 20.0).abs() < 1e-10);
18848 assert!((r3.values()[2].to_f64().unwrap() - 40.0).abs() < 1e-10);
18849 }
18850
18851 #[test]
18852 fn around_uses_numpy_half_even_ties() {
18853 let col = Column::from_values(vec![
18857 Scalar::Float64(0.5),
18858 Scalar::Float64(1.5),
18859 Scalar::Float64(2.5),
18860 Scalar::Float64(3.5),
18861 Scalar::Float64(-2.5),
18862 ])
18863 .unwrap();
18864 let r = col.around(0).unwrap();
18865 let got: Vec<f64> = r.values().iter().map(|v| v.to_f64().unwrap()).collect();
18866 assert_eq!(got, vec![0.0, 2.0, 2.0, 4.0, -2.0]);
18867
18868 let tens = Column::from_values(vec![
18870 Scalar::Float64(15.0),
18871 Scalar::Float64(25.0),
18872 Scalar::Float64(35.0),
18873 ])
18874 .unwrap();
18875 let rt = tens.around(-1).unwrap();
18876 let gott: Vec<f64> = rt.values().iter().map(|v| v.to_f64().unwrap()).collect();
18877 assert_eq!(gott, vec![20.0, 20.0, 40.0]);
18878
18879 assert_eq!(
18881 col.around(0).unwrap().values(),
18882 col.round(0).unwrap().values()
18883 );
18884 }
18885
18886 #[test]
18887 fn unwrap_removes_phase_discontinuities() {
18888 use std::f64::consts::PI;
18889 let col = Column::from_values(vec![
18890 Scalar::Float64(0.0),
18891 Scalar::Float64(PI * 0.9),
18892 Scalar::Float64(-PI * 0.9), Scalar::Float64(0.0),
18894 ])
18895 .unwrap();
18896 let result = col.unwrap(None).unwrap();
18897 assert!((result.values()[0].to_f64().unwrap() - 0.0).abs() < 1e-10);
18899 assert!((result.values()[1].to_f64().unwrap() - PI * 0.9).abs() < 1e-10);
18901 let v2 = result.values()[2].to_f64().unwrap();
18903 let v1 = result.values()[1].to_f64().unwrap();
18904 assert!((v2 - v1).abs() < PI); }
18906 }
18907
18908 #[test]
18911 fn column_has_nulls_detects_missing_values() {
18912 let col_with_null = Column::from_values(vec![
18913 Scalar::Int64(1),
18914 Scalar::Null(NullKind::Null),
18915 Scalar::Int64(3),
18916 ])
18917 .unwrap();
18918 assert!(col_with_null.has_nulls());
18919
18920 let col_no_null =
18921 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
18922 .unwrap();
18923 assert!(!col_no_null.has_nulls());
18924 }
18925
18926 #[test]
18927 fn column_promote_to_nullable_upgrades_dtype() {
18928 let col = Column::new(
18929 DType::Int64,
18930 vec![
18931 Scalar::Int64(1),
18932 Scalar::Null(NullKind::Null),
18933 Scalar::Int64(3),
18934 ],
18935 )
18936 .unwrap();
18937 assert_eq!(col.dtype(), DType::Int64);
18938 assert!(col.has_nulls());
18939
18940 let promoted = col.promote_to_nullable();
18941 assert_eq!(promoted.dtype(), DType::Int64Nullable);
18942 assert_eq!(promoted.len(), 3);
18943 }
18944
18945 #[test]
18946 fn column_promote_to_nullable_noop_without_nulls() {
18947 let col = Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
18948 .unwrap();
18949 let promoted = col.promote_to_nullable();
18950 assert_eq!(promoted.dtype(), DType::Int64);
18952 }
18953
18954 #[test]
18955 fn column_with_dtype_changes_metadata() {
18956 let col = Column::new(DType::Int64, vec![Scalar::Int64(42)]).unwrap();
18957 let changed = col.with_dtype(DType::Int64Nullable);
18958 assert_eq!(changed.dtype(), DType::Int64Nullable);
18959 assert_eq!(changed.values()[0], Scalar::Int64(42));
18960 }
18961
18962 #[test]
18963 fn nullable_int64_from_scalars_preserves_storage() {
18964 use super::ColumnData;
18965 let values = vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)];
18966 let data = ColumnData::from_scalars(&values, DType::Int64Nullable);
18967 assert!(matches!(&data, ColumnData::Int64(_)));
18968 if let ColumnData::Int64(arr) = data {
18969 assert_eq!(arr, vec![1, 2, 3]);
18970 }
18971 }
18972
18973 #[test]
18974 fn typed_all_valid_constructors_keep_single_typed_backing() {
18975 let ints = Column::from_i64_values(vec![1, 2, 3]);
18976 assert_eq!(ints.dtype(), DType::Int64);
18977 assert!(ints.validity.all());
18978 assert!(ints.data.is_none());
18979 assert_eq!(ints.as_i64_slice(), Some([1, 2, 3].as_slice()));
18980 assert_eq!(
18981 ints.values(),
18982 &[Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)]
18983 );
18984
18985 let floats = Column::from_f64_values(vec![1.5, -0.0, f64::INFINITY]);
18986 assert_eq!(floats.dtype(), DType::Float64);
18987 assert!(floats.validity.all());
18988 assert!(floats.data.is_none());
18989 assert_eq!(
18990 floats.as_f64_slice().map(|values| {
18991 values
18992 .iter()
18993 .map(|value| value.to_bits())
18994 .collect::<Vec<_>>()
18995 }),
18996 Some(vec![
18997 1.5f64.to_bits(),
18998 (-0.0f64).to_bits(),
18999 f64::INFINITY.to_bits()
19000 ])
19001 );
19002 assert_eq!(
19003 floats.values(),
19004 &[
19005 Scalar::Float64(1.5),
19006 Scalar::Float64(-0.0),
19007 Scalar::Float64(f64::INFINITY)
19008 ]
19009 );
19010 }
19011
19012 #[test]
19013 fn repeated_slice_int64_column_matches_eager_materialization() {
19014 let lazy = Column::from_i64_repeated_slices(
19015 vec![10, 11, 12, 20, 21],
19016 vec![(0, 3), (3, 2), (0, 3)],
19017 );
19018 let eager = Column::from_i64_values(vec![10, 11, 12, 20, 21, 10, 11, 12]);
19019
19020 assert_eq!(lazy.dtype(), DType::Int64);
19021 assert!(lazy.validity.all());
19022 assert_eq!(lazy.len(), eager.len());
19023 assert_eq!(lazy.as_i64_slice(), eager.as_i64_slice());
19024 assert_eq!(lazy.values(), eager.values());
19025 assert_eq!(lazy, eager);
19026 }
19027}