1#![allow(unsafe_op_in_unsafe_fn)]
2use std::borrow::Cow;
3
4use arrow::types::PrimitiveType;
5use polars_compute::cast::SerPrimitive;
6use polars_error::feature_gated;
7use polars_utils::total_ord::ToTotalOrd;
8
9use super::*;
10use crate::CHEAP_SERIES_HASH_LIMIT;
11#[cfg(feature = "dtype-struct")]
12use crate::prelude::any_value::arr_to_any_value;
13
14#[cfg(feature = "object")]
15#[derive(Debug)]
16pub struct OwnedObject(pub Box<dyn PolarsObjectSafe>);
17
18#[cfg(feature = "object")]
19impl Clone for OwnedObject {
20 fn clone(&self) -> Self {
21 Self(self.0.to_boxed())
22 }
23}
24
25#[derive(Debug, Clone, Default)]
26pub enum AnyValue<'a> {
27 #[default]
28 Null,
29 Boolean(bool),
31 String(&'a str),
33 UInt8(u8),
35 UInt16(u16),
37 UInt32(u32),
39 UInt64(u64),
41 Int8(i8),
43 Int16(i16),
45 Int32(i32),
47 Int64(i64),
49 Int128(i128),
51 Float32(f32),
53 Float64(f64),
55 #[cfg(feature = "dtype-date")]
58 Date(i32),
59 #[cfg(feature = "dtype-datetime")]
62 Datetime(i64, TimeUnit, Option<&'a TimeZone>),
63 #[cfg(feature = "dtype-datetime")]
66 DatetimeOwned(i64, TimeUnit, Option<Arc<TimeZone>>),
67 #[cfg(feature = "dtype-duration")]
69 Duration(i64, TimeUnit),
70 #[cfg(feature = "dtype-time")]
72 Time(i64),
73 #[cfg(feature = "dtype-categorical")]
74 Categorical(CatSize, &'a Arc<CategoricalMapping>),
75 #[cfg(feature = "dtype-categorical")]
76 CategoricalOwned(CatSize, Arc<CategoricalMapping>),
77 #[cfg(feature = "dtype-categorical")]
78 Enum(CatSize, &'a Arc<CategoricalMapping>),
79 #[cfg(feature = "dtype-categorical")]
80 EnumOwned(CatSize, Arc<CategoricalMapping>),
81 List(Series),
83 #[cfg(feature = "dtype-array")]
84 Array(Series, usize),
85 #[cfg(feature = "object")]
87 Object(&'a dyn PolarsObjectSafe),
88 #[cfg(feature = "object")]
89 ObjectOwned(OwnedObject),
90 #[cfg(feature = "dtype-struct")]
95 Struct(usize, &'a StructArray, &'a [Field]),
96 #[cfg(feature = "dtype-struct")]
97 StructOwned(Box<(Vec<AnyValue<'a>>, Vec<Field>)>),
98 StringOwned(PlSmallStr),
100 Binary(&'a [u8]),
101 BinaryOwned(Vec<u8>),
102 #[cfg(feature = "dtype-decimal")]
104 Decimal(i128, usize),
105}
106
107impl AnyValue<'static> {
108 pub fn zero_sum(dtype: &DataType) -> Self {
109 match dtype {
110 DataType::String => AnyValue::StringOwned(PlSmallStr::EMPTY),
111 DataType::Binary => AnyValue::BinaryOwned(Vec::new()),
112 DataType::Boolean => (0 as IdxSize).into(),
113 d if d.is_primitive_numeric() => unsafe {
115 std::mem::transmute::<AnyValue<'_>, AnyValue<'static>>(
116 AnyValue::UInt8(0).cast(dtype),
117 )
118 },
119 #[cfg(feature = "dtype-duration")]
120 DataType::Duration(unit) => AnyValue::Duration(0, *unit),
121 #[cfg(feature = "dtype-decimal")]
122 DataType::Decimal(_p, s) => {
123 AnyValue::Decimal(0, s.expect("unknown scale during execution"))
124 },
125 _ => AnyValue::Null,
126 }
127 }
128
129 pub fn can_have_dtype(&self, dtype: &DataType) -> bool {
131 matches!(self, AnyValue::Null) || dtype == &self.dtype()
132 }
133
134 pub fn default_value(
136 dtype: &DataType,
137 numeric_to_one: bool,
138 num_list_values: usize,
139 ) -> AnyValue<'static> {
140 use {AnyValue as AV, DataType as DT};
141 match dtype {
142 DT::Boolean => AV::Boolean(false),
143 DT::UInt8 => AV::UInt8(numeric_to_one.into()),
144 DT::UInt16 => AV::UInt16(numeric_to_one.into()),
145 DT::UInt32 => AV::UInt32(numeric_to_one.into()),
146 DT::UInt64 => AV::UInt64(numeric_to_one.into()),
147 DT::Int8 => AV::Int8(numeric_to_one.into()),
148 DT::Int16 => AV::Int16(numeric_to_one.into()),
149 DT::Int32 => AV::Int32(numeric_to_one.into()),
150 DT::Int64 => AV::Int64(numeric_to_one.into()),
151 DT::Int128 => AV::Int128(numeric_to_one.into()),
152 DT::Float32 => AV::Float32(numeric_to_one.into()),
153 DT::Float64 => AV::Float64(numeric_to_one.into()),
154 #[cfg(feature = "dtype-decimal")]
155 DT::Decimal(_, scale) => AV::Decimal(0, scale.unwrap()),
156 DT::String => AV::String(""),
157 DT::Binary => AV::Binary(&[]),
158 DT::BinaryOffset => AV::Binary(&[]),
159 DT::Date => feature_gated!("dtype-date", AV::Date(0)),
160 DT::Datetime(time_unit, time_zone) => feature_gated!(
161 "dtype-datetime",
162 AV::DatetimeOwned(0, *time_unit, time_zone.clone().map(Arc::new))
163 ),
164 DT::Duration(time_unit) => {
165 feature_gated!("dtype-duration", AV::Duration(0, *time_unit))
166 },
167 DT::Time => feature_gated!("dtype-time", AV::Time(0)),
168 #[cfg(feature = "dtype-array")]
169 DT::Array(inner_dtype, width) => {
170 let inner_value =
171 AnyValue::default_value(inner_dtype, numeric_to_one, num_list_values);
172 AV::Array(
173 Scalar::new(inner_dtype.as_ref().clone(), inner_value)
174 .into_series(PlSmallStr::EMPTY)
175 .new_from_index(0, *width),
176 *width,
177 )
178 },
179 DT::List(inner_dtype) => AV::List(if num_list_values == 0 {
180 Series::new_empty(PlSmallStr::EMPTY, inner_dtype.as_ref())
181 } else {
182 let inner_value =
183 AnyValue::default_value(inner_dtype, numeric_to_one, num_list_values);
184
185 Scalar::new(inner_dtype.as_ref().clone(), inner_value)
186 .into_series(PlSmallStr::EMPTY)
187 .new_from_index(0, num_list_values)
188 }),
189 #[cfg(feature = "object")]
190 DT::Object(_) => AV::Null,
191 DT::Null => AV::Null,
192 #[cfg(feature = "dtype-categorical")]
193 DT::Categorical(_, _) => AV::Null,
194 #[cfg(feature = "dtype-categorical")]
195 DT::Enum(categories, mapping) => match categories.categories().is_empty() {
196 true => AV::Null,
197 false => AV::EnumOwned(0, mapping.clone()),
198 },
199 #[cfg(feature = "dtype-struct")]
200 DT::Struct(fields) => AV::StructOwned(Box::new((
201 fields
202 .iter()
203 .map(|f| AnyValue::default_value(f.dtype(), numeric_to_one, num_list_values))
204 .collect(),
205 fields.clone(),
206 ))),
207 DT::Unknown(_) => unreachable!(),
208 }
209 }
210}
211
212impl<'a> AnyValue<'a> {
213 pub fn dtype(&self) -> DataType {
218 use AnyValue::*;
219 match self {
220 Null => DataType::Null,
221 Boolean(_) => DataType::Boolean,
222 Int8(_) => DataType::Int8,
223 Int16(_) => DataType::Int16,
224 Int32(_) => DataType::Int32,
225 Int64(_) => DataType::Int64,
226 Int128(_) => DataType::Int128,
227 UInt8(_) => DataType::UInt8,
228 UInt16(_) => DataType::UInt16,
229 UInt32(_) => DataType::UInt32,
230 UInt64(_) => DataType::UInt64,
231 Float32(_) => DataType::Float32,
232 Float64(_) => DataType::Float64,
233 String(_) | StringOwned(_) => DataType::String,
234 Binary(_) | BinaryOwned(_) => DataType::Binary,
235 #[cfg(feature = "dtype-date")]
236 Date(_) => DataType::Date,
237 #[cfg(feature = "dtype-time")]
238 Time(_) => DataType::Time,
239 #[cfg(feature = "dtype-datetime")]
240 Datetime(_, tu, tz) => DataType::Datetime(*tu, (*tz).cloned()),
241 #[cfg(feature = "dtype-datetime")]
242 DatetimeOwned(_, tu, tz) => {
243 DataType::Datetime(*tu, tz.as_ref().map(|v| v.as_ref().clone()))
244 },
245 #[cfg(feature = "dtype-duration")]
246 Duration(_, tu) => DataType::Duration(*tu),
247 #[cfg(feature = "dtype-categorical")]
248 Categorical(_, _) | CategoricalOwned(_, _) => {
249 unimplemented!("can not get dtype of Categorical AnyValue")
250 },
251 #[cfg(feature = "dtype-categorical")]
252 Enum(_, _) | EnumOwned(_, _) => unimplemented!("can not get dtype of Enum AnyValue"),
253 List(s) => DataType::List(Box::new(s.dtype().clone())),
254 #[cfg(feature = "dtype-array")]
255 Array(s, size) => DataType::Array(Box::new(s.dtype().clone()), *size),
256 #[cfg(feature = "dtype-struct")]
257 Struct(_, _, fields) => DataType::Struct(fields.to_vec()),
258 #[cfg(feature = "dtype-struct")]
259 StructOwned(payload) => DataType::Struct(payload.1.clone()),
260 #[cfg(feature = "dtype-decimal")]
261 Decimal(_, scale) => DataType::Decimal(None, Some(*scale)),
262 #[cfg(feature = "object")]
263 Object(o) => DataType::Object(o.type_name()),
264 #[cfg(feature = "object")]
265 ObjectOwned(o) => DataType::Object(o.0.type_name()),
266 }
267 }
268
269 #[doc(hidden)]
271 #[inline]
272 pub fn extract<T: NumCast>(&self) -> Option<T> {
273 use AnyValue::*;
274 match self {
275 Int8(v) => NumCast::from(*v),
276 Int16(v) => NumCast::from(*v),
277 Int32(v) => NumCast::from(*v),
278 Int64(v) => NumCast::from(*v),
279 Int128(v) => NumCast::from(*v),
280 UInt8(v) => NumCast::from(*v),
281 UInt16(v) => NumCast::from(*v),
282 UInt32(v) => NumCast::from(*v),
283 UInt64(v) => NumCast::from(*v),
284 Float32(v) => NumCast::from(*v),
285 Float64(v) => NumCast::from(*v),
286 #[cfg(feature = "dtype-date")]
287 Date(v) => NumCast::from(*v),
288 #[cfg(feature = "dtype-datetime")]
289 Datetime(v, _, _) | DatetimeOwned(v, _, _) => NumCast::from(*v),
290 #[cfg(feature = "dtype-time")]
291 Time(v) => NumCast::from(*v),
292 #[cfg(feature = "dtype-duration")]
293 Duration(v, _) => NumCast::from(*v),
294 #[cfg(feature = "dtype-decimal")]
295 Decimal(v, scale) => {
296 if *scale == 0 {
297 NumCast::from(*v)
298 } else {
299 let f: Option<f64> = NumCast::from(*v);
300 NumCast::from(f? / 10f64.powi(*scale as _))
301 }
302 },
303 Boolean(v) => NumCast::from(if *v { 1 } else { 0 }),
304 String(v) => {
305 if let Ok(val) = (*v).parse::<i128>() {
306 NumCast::from(val)
307 } else {
308 NumCast::from((*v).parse::<f64>().ok()?)
309 }
310 },
311 StringOwned(v) => String(v.as_str()).extract(),
312 _ => None,
313 }
314 }
315
316 #[inline]
317 pub fn try_extract<T: NumCast>(&self) -> PolarsResult<T> {
318 self.extract().ok_or_else(|| {
319 polars_err!(
320 ComputeError: "could not extract number from any-value of dtype: '{:?}'",
321 self.dtype(),
322 )
323 })
324 }
325
326 pub fn is_boolean(&self) -> bool {
327 matches!(self, AnyValue::Boolean(_))
328 }
329
330 pub fn is_primitive_numeric(&self) -> bool {
331 self.is_integer() || self.is_float()
332 }
333
334 pub fn is_float(&self) -> bool {
335 matches!(self, AnyValue::Float32(_) | AnyValue::Float64(_))
336 }
337
338 pub fn is_integer(&self) -> bool {
339 self.is_signed_integer() || self.is_unsigned_integer()
340 }
341
342 pub fn is_signed_integer(&self) -> bool {
343 matches!(
344 self,
345 AnyValue::Int8(_)
346 | AnyValue::Int16(_)
347 | AnyValue::Int32(_)
348 | AnyValue::Int64(_)
349 | AnyValue::Int128(_)
350 )
351 }
352
353 pub fn is_unsigned_integer(&self) -> bool {
354 matches!(
355 self,
356 AnyValue::UInt8(_) | AnyValue::UInt16(_) | AnyValue::UInt32(_) | AnyValue::UInt64(_)
357 )
358 }
359
360 pub fn is_nan(&self) -> bool {
361 match self {
362 AnyValue::Float32(f) => f.is_nan(),
363 AnyValue::Float64(f) => f.is_nan(),
364 _ => false,
365 }
366 }
367
368 pub fn is_null(&self) -> bool {
369 matches!(self, AnyValue::Null)
370 }
371
372 pub fn is_nested_null(&self) -> bool {
373 match self {
374 AnyValue::Null => true,
375 AnyValue::List(s) => s.null_count() == s.len(),
376 #[cfg(feature = "dtype-array")]
377 AnyValue::Array(s, _) => s.null_count() == s.len(),
378 #[cfg(feature = "dtype-struct")]
379 AnyValue::Struct(_, _, _) => self._iter_struct_av().all(|av| av.is_nested_null()),
380 _ => false,
381 }
382 }
383
384 pub fn strict_cast(&self, dtype: &'a DataType) -> Option<AnyValue<'a>> {
387 let new_av = match (self, dtype) {
388 (av, DataType::UInt8) => AnyValue::UInt8(av.extract::<u8>()?),
390 (av, DataType::UInt16) => AnyValue::UInt16(av.extract::<u16>()?),
391 (av, DataType::UInt32) => AnyValue::UInt32(av.extract::<u32>()?),
392 (av, DataType::UInt64) => AnyValue::UInt64(av.extract::<u64>()?),
393 (av, DataType::Int8) => AnyValue::Int8(av.extract::<i8>()?),
394 (av, DataType::Int16) => AnyValue::Int16(av.extract::<i16>()?),
395 (av, DataType::Int32) => AnyValue::Int32(av.extract::<i32>()?),
396 (av, DataType::Int64) => AnyValue::Int64(av.extract::<i64>()?),
397 (av, DataType::Int128) => AnyValue::Int128(av.extract::<i128>()?),
398 (av, DataType::Float32) => AnyValue::Float32(av.extract::<f32>()?),
399 (av, DataType::Float64) => AnyValue::Float64(av.extract::<f64>()?),
400
401 (AnyValue::UInt8(v), DataType::Boolean) => AnyValue::Boolean(*v != u8::default()),
403 (AnyValue::UInt16(v), DataType::Boolean) => AnyValue::Boolean(*v != u16::default()),
404 (AnyValue::UInt32(v), DataType::Boolean) => AnyValue::Boolean(*v != u32::default()),
405 (AnyValue::UInt64(v), DataType::Boolean) => AnyValue::Boolean(*v != u64::default()),
406 (AnyValue::Int8(v), DataType::Boolean) => AnyValue::Boolean(*v != i8::default()),
407 (AnyValue::Int16(v), DataType::Boolean) => AnyValue::Boolean(*v != i16::default()),
408 (AnyValue::Int32(v), DataType::Boolean) => AnyValue::Boolean(*v != i32::default()),
409 (AnyValue::Int64(v), DataType::Boolean) => AnyValue::Boolean(*v != i64::default()),
410 (AnyValue::Int128(v), DataType::Boolean) => AnyValue::Boolean(*v != i128::default()),
411 (AnyValue::Float32(v), DataType::Boolean) => AnyValue::Boolean(*v != f32::default()),
412 (AnyValue::Float64(v), DataType::Boolean) => AnyValue::Boolean(*v != f64::default()),
413
414 #[cfg(feature = "dtype-categorical")]
416 (
417 &AnyValue::Categorical(cat, &ref lmap) | &AnyValue::CategoricalOwned(cat, ref lmap),
418 DataType::Categorical(_, rmap),
419 ) => {
420 if Arc::ptr_eq(lmap, rmap) {
421 self.clone()
422 } else {
423 let s = unsafe { lmap.cat_to_str_unchecked(cat) };
424 let new_cat = rmap.insert_cat(s).unwrap();
425 AnyValue::CategoricalOwned(new_cat, rmap.clone())
426 }
427 },
428
429 #[cfg(feature = "dtype-categorical")]
430 (
431 &AnyValue::Enum(cat, &ref lmap) | &AnyValue::EnumOwned(cat, ref lmap),
432 DataType::Enum(_, rmap),
433 ) => {
434 if Arc::ptr_eq(lmap, rmap) {
435 self.clone()
436 } else {
437 let s = unsafe { lmap.cat_to_str_unchecked(cat) };
438 let new_cat = rmap.get_cat(s)?;
439 AnyValue::EnumOwned(new_cat, rmap.clone())
440 }
441 },
442
443 #[cfg(feature = "dtype-categorical")]
444 (
445 &AnyValue::Categorical(cat, &ref map)
446 | &AnyValue::CategoricalOwned(cat, ref map)
447 | &AnyValue::Enum(cat, &ref map)
448 | &AnyValue::EnumOwned(cat, ref map),
449 DataType::String,
450 ) => {
451 let s = unsafe { map.cat_to_str_unchecked(cat) };
452 AnyValue::StringOwned(PlSmallStr::from(s))
453 },
454
455 #[cfg(feature = "dtype-categorical")]
456 (AnyValue::String(s), DataType::Categorical(_, map)) => {
457 AnyValue::CategoricalOwned(map.insert_cat(s).unwrap(), map.clone())
458 },
459
460 #[cfg(feature = "dtype-categorical")]
461 (AnyValue::StringOwned(s), DataType::Categorical(_, map)) => {
462 AnyValue::CategoricalOwned(map.insert_cat(s).unwrap(), map.clone())
463 },
464
465 #[cfg(feature = "dtype-categorical")]
466 (AnyValue::String(s), DataType::Enum(_, map)) => {
467 AnyValue::CategoricalOwned(map.get_cat(s)?, map.clone())
468 },
469
470 #[cfg(feature = "dtype-categorical")]
471 (AnyValue::StringOwned(s), DataType::Enum(_, map)) => {
472 AnyValue::CategoricalOwned(map.get_cat(s)?, map.clone())
473 },
474
475 (AnyValue::String(v), DataType::String) => AnyValue::String(v),
477 (AnyValue::StringOwned(v), DataType::String) => AnyValue::StringOwned(v.clone()),
478
479 (av, DataType::String) => {
480 let mut tmp = vec![];
481 if av.is_unsigned_integer() {
482 let val = av.extract::<u64>()?;
483 SerPrimitive::write(&mut tmp, val);
484 } else if av.is_float() {
485 let val = av.extract::<f64>()?;
486 SerPrimitive::write(&mut tmp, val);
487 } else {
488 let val = av.extract::<i64>()?;
489 SerPrimitive::write(&mut tmp, val);
490 }
491 AnyValue::StringOwned(PlSmallStr::from_str(std::str::from_utf8(&tmp).unwrap()))
492 },
493
494 (AnyValue::String(v), DataType::Binary) => AnyValue::Binary(v.as_bytes()),
496
497 #[cfg(feature = "dtype-datetime")]
499 (av, DataType::Datetime(tu, tz)) if av.is_primitive_numeric() => {
500 AnyValue::Datetime(av.extract::<i64>()?, *tu, tz.as_ref())
501 },
502 #[cfg(all(feature = "dtype-datetime", feature = "dtype-date"))]
503 (AnyValue::Date(v), DataType::Datetime(tu, _)) => AnyValue::Datetime(
504 match tu {
505 TimeUnit::Nanoseconds => (*v as i64) * NS_IN_DAY,
506 TimeUnit::Microseconds => (*v as i64) * US_IN_DAY,
507 TimeUnit::Milliseconds => (*v as i64) * MS_IN_DAY,
508 },
509 *tu,
510 None,
511 ),
512 #[cfg(feature = "dtype-datetime")]
513 (
514 AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _),
515 DataType::Datetime(tu_r, tz_r),
516 ) => AnyValue::Datetime(
517 match (tu, tu_r) {
518 (TimeUnit::Nanoseconds, TimeUnit::Microseconds) => *v / 1_000i64,
519 (TimeUnit::Nanoseconds, TimeUnit::Milliseconds) => *v / 1_000_000i64,
520 (TimeUnit::Microseconds, TimeUnit::Nanoseconds) => *v * 1_000i64,
521 (TimeUnit::Microseconds, TimeUnit::Milliseconds) => *v / 1_000i64,
522 (TimeUnit::Milliseconds, TimeUnit::Microseconds) => *v * 1_000i64,
523 (TimeUnit::Milliseconds, TimeUnit::Nanoseconds) => *v * 1_000_000i64,
524 _ => *v,
525 },
526 *tu_r,
527 tz_r.as_ref(),
528 ),
529
530 #[cfg(feature = "dtype-date")]
532 (av, DataType::Date) if av.is_primitive_numeric() => {
533 AnyValue::Date(av.extract::<i32>()?)
534 },
535 #[cfg(all(feature = "dtype-date", feature = "dtype-datetime"))]
536 (AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _), DataType::Date) => {
537 AnyValue::Date(match tu {
538 TimeUnit::Nanoseconds => *v / NS_IN_DAY,
539 TimeUnit::Microseconds => *v / US_IN_DAY,
540 TimeUnit::Milliseconds => *v / MS_IN_DAY,
541 } as i32)
542 },
543
544 #[cfg(feature = "dtype-time")]
546 (av, DataType::Time) if av.is_primitive_numeric() => {
547 AnyValue::Time(av.extract::<i64>()?)
548 },
549 #[cfg(all(feature = "dtype-time", feature = "dtype-datetime"))]
550 (AnyValue::Datetime(v, tu, _) | AnyValue::DatetimeOwned(v, tu, _), DataType::Time) => {
551 AnyValue::Time(match tu {
552 TimeUnit::Nanoseconds => *v % NS_IN_DAY,
553 TimeUnit::Microseconds => (*v % US_IN_DAY) * 1_000i64,
554 TimeUnit::Milliseconds => (*v % MS_IN_DAY) * 1_000_000i64,
555 })
556 },
557
558 #[cfg(feature = "dtype-duration")]
560 (av, DataType::Duration(tu)) if av.is_primitive_numeric() => {
561 AnyValue::Duration(av.extract::<i64>()?, *tu)
562 },
563 #[cfg(all(feature = "dtype-duration", feature = "dtype-time"))]
564 (AnyValue::Time(v), DataType::Duration(tu)) => AnyValue::Duration(
565 match *tu {
566 TimeUnit::Nanoseconds => *v,
567 TimeUnit::Microseconds => *v / 1_000i64,
568 TimeUnit::Milliseconds => *v / 1_000_000i64,
569 },
570 *tu,
571 ),
572 #[cfg(feature = "dtype-duration")]
573 (AnyValue::Duration(v, tu), DataType::Duration(tu_r)) => AnyValue::Duration(
574 match (tu, tu_r) {
575 (_, _) if tu == tu_r => *v,
576 (TimeUnit::Nanoseconds, TimeUnit::Microseconds) => *v / 1_000i64,
577 (TimeUnit::Nanoseconds, TimeUnit::Milliseconds) => *v / 1_000_000i64,
578 (TimeUnit::Microseconds, TimeUnit::Nanoseconds) => *v * 1_000i64,
579 (TimeUnit::Microseconds, TimeUnit::Milliseconds) => *v / 1_000i64,
580 (TimeUnit::Milliseconds, TimeUnit::Microseconds) => *v * 1_000i64,
581 (TimeUnit::Milliseconds, TimeUnit::Nanoseconds) => *v * 1_000_000i64,
582 _ => *v,
583 },
584 *tu_r,
585 ),
586
587 #[cfg(feature = "dtype-decimal")]
589 (av, DataType::Decimal(prec, scale)) if av.is_integer() => {
590 let value = av.try_extract::<i128>().unwrap();
591 let scale = scale.unwrap_or(0);
592 let factor = 10_i128.pow(scale as _); let converted = value.checked_mul(factor)?;
594
595 let prec = prec.unwrap_or(38) as u32;
597 let num_digits = (converted.abs() as f64).log10().ceil() as u32;
598 if num_digits > prec {
599 return None;
600 }
601
602 AnyValue::Decimal(converted, scale)
603 },
604 #[cfg(feature = "dtype-decimal")]
605 (AnyValue::Decimal(value, scale_av), DataType::Decimal(_, scale)) => {
606 let Some(scale) = scale else {
607 return Some(self.clone());
608 };
609 let scale_diff = scale.checked_sub(*scale_av)?;
611 let factor = 10_i128.pow(scale_diff as _); let converted = value.checked_mul(factor)?;
613 AnyValue::Decimal(converted, *scale)
614 },
615
616 (av, dtype) if av.dtype() == *dtype => self.clone(),
618
619 _ => return None,
620 };
621 Some(new_av)
622 }
623
624 pub fn try_strict_cast(&self, dtype: &'a DataType) -> PolarsResult<AnyValue<'a>> {
627 self.strict_cast(dtype).ok_or_else(
628 || polars_err!(ComputeError: "cannot cast any-value {:?} to dtype '{}'", self, dtype),
629 )
630 }
631
632 pub fn cast(&self, dtype: &'a DataType) -> AnyValue<'a> {
633 match self.strict_cast(dtype) {
634 Some(av) => av,
635 None => AnyValue::Null,
636 }
637 }
638
639 pub fn idx(&self) -> IdxSize {
640 match self {
641 #[cfg(not(feature = "bigidx"))]
642 Self::UInt32(v) => *v,
643 #[cfg(feature = "bigidx")]
644 Self::UInt64(v) => *v,
645 _ => panic!("expected index type found {self:?}"),
646 }
647 }
648
649 pub fn str_value(&self) -> Cow<'a, str> {
650 match self {
651 Self::String(s) => Cow::Borrowed(s),
652 Self::StringOwned(s) => Cow::Owned(s.to_string()),
653 Self::Null => Cow::Borrowed("null"),
654 #[cfg(feature = "dtype-categorical")]
655 Self::Categorical(cat, map) | Self::Enum(cat, map) => {
656 Cow::Borrowed(unsafe { map.cat_to_str_unchecked(*cat) })
657 },
658 #[cfg(feature = "dtype-categorical")]
659 Self::CategoricalOwned(cat, map) | Self::EnumOwned(cat, map) => {
660 Cow::Owned(unsafe { map.cat_to_str_unchecked(*cat) }.to_owned())
661 },
662 av => Cow::Owned(av.to_string()),
663 }
664 }
665
666 pub fn to_physical(self) -> Self {
667 match self {
668 Self::Null
669 | Self::Boolean(_)
670 | Self::String(_)
671 | Self::StringOwned(_)
672 | Self::Binary(_)
673 | Self::BinaryOwned(_)
674 | Self::UInt8(_)
675 | Self::UInt16(_)
676 | Self::UInt32(_)
677 | Self::UInt64(_)
678 | Self::Int8(_)
679 | Self::Int16(_)
680 | Self::Int32(_)
681 | Self::Int64(_)
682 | Self::Int128(_)
683 | Self::Float32(_)
684 | Self::Float64(_) => self,
685
686 #[cfg(feature = "object")]
687 Self::Object(_) | Self::ObjectOwned(_) => self,
688
689 #[cfg(feature = "dtype-date")]
690 Self::Date(v) => Self::Int32(v),
691 #[cfg(feature = "dtype-datetime")]
692 Self::Datetime(v, _, _) | Self::DatetimeOwned(v, _, _) => Self::Int64(v),
693
694 #[cfg(feature = "dtype-duration")]
695 Self::Duration(v, _) => Self::Int64(v),
696 #[cfg(feature = "dtype-time")]
697 Self::Time(v) => Self::Int64(v),
698
699 #[cfg(feature = "dtype-categorical")]
700 Self::Categorical(v, _)
701 | Self::CategoricalOwned(v, _)
702 | Self::Enum(v, _)
703 | Self::EnumOwned(v, _) => Self::UInt32(v),
704 Self::List(series) => Self::List(series.to_physical_repr().into_owned()),
705
706 #[cfg(feature = "dtype-array")]
707 Self::Array(series, width) => {
708 Self::Array(series.to_physical_repr().into_owned(), width)
709 },
710
711 #[cfg(feature = "dtype-struct")]
712 Self::Struct(_, _, _) => todo!(),
713 #[cfg(feature = "dtype-struct")]
714 Self::StructOwned(values) => Self::StructOwned(Box::new((
715 values.0.into_iter().map(|v| v.to_physical()).collect(),
716 values
717 .1
718 .into_iter()
719 .map(|mut f| {
720 f.dtype = f.dtype.to_physical();
721 f
722 })
723 .collect(),
724 ))),
725
726 #[cfg(feature = "dtype-decimal")]
727 Self::Decimal(v, _) => Self::Int128(v),
728 }
729 }
730
731 #[inline]
732 pub fn extract_bool(&self) -> Option<bool> {
733 match self {
734 AnyValue::Boolean(v) => Some(*v),
735 _ => None,
736 }
737 }
738
739 #[inline]
740 pub fn extract_str(&self) -> Option<&str> {
741 match self {
742 AnyValue::String(v) => Some(v),
743 AnyValue::StringOwned(v) => Some(v.as_str()),
744 _ => None,
745 }
746 }
747
748 #[inline]
749 pub fn extract_bytes(&self) -> Option<&[u8]> {
750 match self {
751 AnyValue::Binary(v) => Some(v),
752 AnyValue::BinaryOwned(v) => Some(v.as_slice()),
753 _ => None,
754 }
755 }
756}
757
758impl From<AnyValue<'_>> for DataType {
759 fn from(value: AnyValue<'_>) -> Self {
760 value.dtype()
761 }
762}
763
764impl<'a> From<&AnyValue<'a>> for DataType {
765 fn from(value: &AnyValue<'a>) -> Self {
766 value.dtype()
767 }
768}
769
770impl AnyValue<'_> {
771 pub fn hash_impl<H: Hasher>(&self, state: &mut H, cheap: bool) {
772 use AnyValue::*;
773 std::mem::discriminant(self).hash(state);
774 match self {
775 Int8(v) => v.hash(state),
776 Int16(v) => v.hash(state),
777 Int32(v) => v.hash(state),
778 Int64(v) => v.hash(state),
779 Int128(v) => feature_gated!("dtype-i128", v.hash(state)),
780 UInt8(v) => v.hash(state),
781 UInt16(v) => v.hash(state),
782 UInt32(v) => v.hash(state),
783 UInt64(v) => v.hash(state),
784 String(v) => v.hash(state),
785 StringOwned(v) => v.hash(state),
786 Float32(v) => v.to_ne_bytes().hash(state),
787 Float64(v) => v.to_ne_bytes().hash(state),
788 Binary(v) => v.hash(state),
789 BinaryOwned(v) => v.hash(state),
790 Boolean(v) => v.hash(state),
791 List(v) => {
792 if !cheap || v.len() < CHEAP_SERIES_HASH_LIMIT {
793 Hash::hash(&Wrap(v.clone()), state)
794 }
795 },
796 #[cfg(feature = "dtype-array")]
797 Array(v, width) => {
798 if !cheap || v.len() < CHEAP_SERIES_HASH_LIMIT {
799 Hash::hash(&Wrap(v.clone()), state)
800 }
801 width.hash(state)
802 },
803 #[cfg(feature = "dtype-date")]
804 Date(v) => v.hash(state),
805 #[cfg(feature = "dtype-datetime")]
806 Datetime(v, tu, tz) => {
807 v.hash(state);
808 tu.hash(state);
809 tz.hash(state);
810 },
811 #[cfg(feature = "dtype-datetime")]
812 DatetimeOwned(v, tu, tz) => {
813 v.hash(state);
814 tu.hash(state);
815 tz.hash(state);
816 },
817 #[cfg(feature = "dtype-duration")]
818 Duration(v, tz) => {
819 v.hash(state);
820 tz.hash(state);
821 },
822 #[cfg(feature = "dtype-time")]
823 Time(v) => v.hash(state),
824 #[cfg(feature = "dtype-categorical")]
825 Categorical(v, _) | CategoricalOwned(v, _) | Enum(v, _) | EnumOwned(v, _) => {
826 v.hash(state)
827 },
828 #[cfg(feature = "object")]
829 Object(_) => {},
830 #[cfg(feature = "object")]
831 ObjectOwned(_) => {},
832 #[cfg(feature = "dtype-struct")]
833 Struct(_, _, _) => {
834 if !cheap {
835 let mut buf = vec![];
836 self._materialize_struct_av(&mut buf);
837 buf.hash(state)
838 }
839 },
840 #[cfg(feature = "dtype-struct")]
841 StructOwned(v) => v.0.hash(state),
842 #[cfg(feature = "dtype-decimal")]
843 Decimal(v, k) => {
844 v.hash(state);
845 k.hash(state);
846 },
847 Null => {},
848 }
849 }
850}
851
852impl Hash for AnyValue<'_> {
853 fn hash<H: Hasher>(&self, state: &mut H) {
854 self.hash_impl(state, false)
855 }
856}
857
858impl Eq for AnyValue<'_> {}
859
860impl<'a, T> From<Option<T>> for AnyValue<'a>
861where
862 T: Into<AnyValue<'a>>,
863{
864 #[inline]
865 fn from(a: Option<T>) -> Self {
866 match a {
867 None => AnyValue::Null,
868 Some(v) => v.into(),
869 }
870 }
871}
872
873impl<'a> AnyValue<'a> {
874 #[cfg(any(feature = "dtype-date", feature = "dtype-datetime"))]
875 pub(crate) fn as_date(&self) -> AnyValue<'static> {
876 match self {
877 #[cfg(feature = "dtype-date")]
878 AnyValue::Int32(v) => AnyValue::Date(*v),
879 AnyValue::Null => AnyValue::Null,
880 dt => panic!("cannot create date from other type. dtype: {dt}"),
881 }
882 }
883 #[cfg(feature = "dtype-datetime")]
884 pub(crate) fn as_datetime(&self, tu: TimeUnit, tz: Option<&'a TimeZone>) -> AnyValue<'a> {
885 match self {
886 AnyValue::Int64(v) => AnyValue::Datetime(*v, tu, tz),
887 AnyValue::Null => AnyValue::Null,
888 dt => panic!("cannot create date from other type. dtype: {dt}"),
889 }
890 }
891
892 #[cfg(feature = "dtype-duration")]
893 pub(crate) fn as_duration(&self, tu: TimeUnit) -> AnyValue<'static> {
894 match self {
895 AnyValue::Int64(v) => AnyValue::Duration(*v, tu),
896 AnyValue::Null => AnyValue::Null,
897 dt => panic!("cannot create date from other type. dtype: {dt}"),
898 }
899 }
900
901 #[cfg(feature = "dtype-time")]
902 pub(crate) fn as_time(&self) -> AnyValue<'static> {
903 match self {
904 AnyValue::Int64(v) => AnyValue::Time(*v),
905 AnyValue::Null => AnyValue::Null,
906 dt => panic!("cannot create date from other type. dtype: {dt}"),
907 }
908 }
909
910 pub(crate) fn to_i128(&self) -> Option<i128> {
911 match self {
912 AnyValue::UInt8(v) => Some((*v).into()),
913 AnyValue::UInt16(v) => Some((*v).into()),
914 AnyValue::UInt32(v) => Some((*v).into()),
915 AnyValue::UInt64(v) => Some((*v).into()),
916 AnyValue::Int8(v) => Some((*v).into()),
917 AnyValue::Int16(v) => Some((*v).into()),
918 AnyValue::Int32(v) => Some((*v).into()),
919 AnyValue::Int64(v) => Some((*v).into()),
920 AnyValue::Int128(v) => Some(*v),
921 _ => None,
922 }
923 }
924
925 pub(crate) fn to_f64(&self) -> Option<f64> {
926 match self {
927 AnyValue::Float32(v) => Some((*v).into()),
928 AnyValue::Float64(v) => Some(*v),
929 _ => None,
930 }
931 }
932
933 #[must_use]
934 pub fn add(&self, rhs: &AnyValue) -> AnyValue<'static> {
935 use AnyValue::*;
936 match (self, rhs) {
937 (Null, r) => r.clone().into_static(),
938 (l, Null) => l.clone().into_static(),
939 (Int32(l), Int32(r)) => Int32(l + r),
940 (Int64(l), Int64(r)) => Int64(l + r),
941 (UInt32(l), UInt32(r)) => UInt32(l + r),
942 (UInt64(l), UInt64(r)) => UInt64(l + r),
943 (Float32(l), Float32(r)) => Float32(l + r),
944 (Float64(l), Float64(r)) => Float64(l + r),
945 #[cfg(feature = "dtype-duration")]
946 (Duration(l, lu), Duration(r, ru)) => {
947 if lu != ru {
948 unimplemented!("adding durations with different units is not supported here");
949 }
950
951 Duration(l + r, *lu)
952 },
953 #[cfg(feature = "dtype-decimal")]
954 (Decimal(l, ls), Decimal(r, rs)) => {
955 if ls != rs {
956 unimplemented!("adding decimals with different scales is not supported here");
957 }
958
959 Decimal(l + r, *ls)
960 },
961 _ => unimplemented!(),
962 }
963 }
964
965 #[inline]
966 pub fn as_borrowed(&self) -> AnyValue<'_> {
967 match self {
968 AnyValue::BinaryOwned(data) => AnyValue::Binary(data),
969 AnyValue::StringOwned(data) => AnyValue::String(data.as_str()),
970 #[cfg(feature = "dtype-datetime")]
971 AnyValue::DatetimeOwned(v, tu, tz) => {
972 AnyValue::Datetime(*v, *tu, tz.as_ref().map(AsRef::as_ref))
973 },
974 #[cfg(feature = "dtype-categorical")]
975 AnyValue::CategoricalOwned(cat, map) => AnyValue::Categorical(*cat, map),
976 #[cfg(feature = "dtype-categorical")]
977 AnyValue::EnumOwned(cat, map) => AnyValue::Enum(*cat, map),
978 av => av.clone(),
979 }
980 }
981
982 #[inline]
985 pub fn into_static(self) -> AnyValue<'static> {
986 use AnyValue::*;
987 match self {
988 Null => Null,
989 Int8(v) => Int8(v),
990 Int16(v) => Int16(v),
991 Int32(v) => Int32(v),
992 Int64(v) => Int64(v),
993 Int128(v) => Int128(v),
994 UInt8(v) => UInt8(v),
995 UInt16(v) => UInt16(v),
996 UInt32(v) => UInt32(v),
997 UInt64(v) => UInt64(v),
998 Boolean(v) => Boolean(v),
999 Float32(v) => Float32(v),
1000 Float64(v) => Float64(v),
1001 #[cfg(feature = "dtype-datetime")]
1002 Datetime(v, tu, tz) => DatetimeOwned(v, tu, tz.map(|v| Arc::new(v.clone()))),
1003 #[cfg(feature = "dtype-datetime")]
1004 DatetimeOwned(v, tu, tz) => DatetimeOwned(v, tu, tz),
1005 #[cfg(feature = "dtype-date")]
1006 Date(v) => Date(v),
1007 #[cfg(feature = "dtype-duration")]
1008 Duration(v, tu) => Duration(v, tu),
1009 #[cfg(feature = "dtype-time")]
1010 Time(v) => Time(v),
1011 List(v) => List(v),
1012 #[cfg(feature = "dtype-array")]
1013 Array(s, size) => Array(s, size),
1014 String(v) => StringOwned(PlSmallStr::from_str(v)),
1015 StringOwned(v) => StringOwned(v),
1016 Binary(v) => BinaryOwned(v.to_vec()),
1017 BinaryOwned(v) => BinaryOwned(v),
1018 #[cfg(feature = "object")]
1019 Object(v) => ObjectOwned(OwnedObject(v.to_boxed())),
1020 #[cfg(feature = "dtype-struct")]
1021 Struct(idx, arr, fields) => {
1022 let avs = struct_to_avs_static(idx, arr, fields);
1023 StructOwned(Box::new((avs, fields.to_vec())))
1024 },
1025 #[cfg(feature = "dtype-struct")]
1026 StructOwned(payload) => {
1027 let av = StructOwned(payload);
1028 unsafe { std::mem::transmute::<AnyValue<'a>, AnyValue<'static>>(av) }
1030 },
1031 #[cfg(feature = "object")]
1032 ObjectOwned(payload) => {
1033 let av = ObjectOwned(payload);
1034 unsafe { std::mem::transmute::<AnyValue<'a>, AnyValue<'static>>(av) }
1036 },
1037 #[cfg(feature = "dtype-decimal")]
1038 Decimal(val, scale) => Decimal(val, scale),
1039 #[cfg(feature = "dtype-categorical")]
1040 Categorical(cat, map) => CategoricalOwned(cat, map.clone()),
1041 #[cfg(feature = "dtype-categorical")]
1042 CategoricalOwned(cat, map) => CategoricalOwned(cat, map),
1043 #[cfg(feature = "dtype-categorical")]
1044 Enum(cat, map) => EnumOwned(cat, map.clone()),
1045 #[cfg(feature = "dtype-categorical")]
1046 EnumOwned(cat, map) => EnumOwned(cat, map),
1047 }
1048 }
1049
1050 pub fn get_str(&self) -> Option<&str> {
1052 match self {
1053 AnyValue::String(s) => Some(s),
1054 AnyValue::StringOwned(s) => Some(s.as_str()),
1055 #[cfg(feature = "dtype-categorical")]
1056 Self::Categorical(cat, map) | Self::Enum(cat, map) => {
1057 Some(unsafe { map.cat_to_str_unchecked(*cat) })
1058 },
1059 #[cfg(feature = "dtype-categorical")]
1060 Self::CategoricalOwned(cat, map) | Self::EnumOwned(cat, map) => {
1061 Some(unsafe { map.cat_to_str_unchecked(*cat) })
1062 },
1063 _ => None,
1064 }
1065 }
1066}
1067
1068impl<'a> From<AnyValue<'a>> for Option<i64> {
1069 fn from(val: AnyValue<'a>) -> Self {
1070 use AnyValue::*;
1071 match val {
1072 Null => None,
1073 Int32(v) => Some(v as i64),
1074 Int64(v) => Some(v),
1075 UInt32(v) => Some(v as i64),
1076 _ => todo!(),
1077 }
1078 }
1079}
1080
1081impl AnyValue<'_> {
1082 #[inline]
1083 pub fn eq_missing(&self, other: &Self, null_equal: bool) -> bool {
1084 fn struct_owned_value_iter<'a>(
1085 v: &'a (Vec<AnyValue<'_>>, Vec<Field>),
1086 ) -> impl ExactSizeIterator<Item = AnyValue<'a>> {
1087 v.0.iter().map(|v| v.as_borrowed())
1088 }
1089 fn struct_value_iter(
1090 idx: usize,
1091 arr: &StructArray,
1092 ) -> impl ExactSizeIterator<Item = AnyValue<'_>> {
1093 assert!(idx < arr.len());
1094
1095 arr.values().iter().map(move |field_arr| unsafe {
1096 field_arr.get_unchecked(idx)
1100 })
1101 }
1102
1103 fn struct_eq_missing<'a>(
1104 l: impl ExactSizeIterator<Item = AnyValue<'a>>,
1105 r: impl ExactSizeIterator<Item = AnyValue<'a>>,
1106 null_equal: bool,
1107 ) -> bool {
1108 if l.len() != r.len() {
1109 return false;
1110 }
1111
1112 l.zip(r).all(|(lv, rv)| lv.eq_missing(&rv, null_equal))
1113 }
1114
1115 use AnyValue::*;
1116 match (self, other) {
1117 (StringOwned(l), r) => AnyValue::String(l.as_str()) == *r,
1119 (BinaryOwned(l), r) => AnyValue::Binary(l.as_slice()) == *r,
1120 #[cfg(feature = "object")]
1121 (ObjectOwned(l), r) => AnyValue::Object(&*l.0) == *r,
1122 (l, StringOwned(r)) => *l == AnyValue::String(r.as_str()),
1123 (l, BinaryOwned(r)) => *l == AnyValue::Binary(r.as_slice()),
1124 #[cfg(feature = "object")]
1125 (l, ObjectOwned(r)) => *l == AnyValue::Object(&*r.0),
1126 #[cfg(feature = "dtype-datetime")]
1127 (DatetimeOwned(lv, ltu, ltz), r) => {
1128 Datetime(*lv, *ltu, ltz.as_ref().map(|v| v.as_ref())) == *r
1129 },
1130 #[cfg(feature = "dtype-datetime")]
1131 (l, DatetimeOwned(rv, rtu, rtz)) => {
1132 *l == Datetime(*rv, *rtu, rtz.as_ref().map(|v| v.as_ref()))
1133 },
1134 #[cfg(feature = "dtype-categorical")]
1135 (CategoricalOwned(cat, map), r) => Categorical(*cat, map) == *r,
1136 #[cfg(feature = "dtype-categorical")]
1137 (l, CategoricalOwned(cat, map)) => *l == Categorical(*cat, map),
1138 #[cfg(feature = "dtype-categorical")]
1139 (EnumOwned(cat, map), r) => Enum(*cat, map) == *r,
1140 #[cfg(feature = "dtype-categorical")]
1141 (l, EnumOwned(cat, map)) => *l == Enum(*cat, map),
1142
1143 (Null, Null) => null_equal,
1145 (Null, _) => false,
1146 (_, Null) => false,
1147
1148 (Boolean(l), Boolean(r)) => *l == *r,
1150 (UInt8(l), UInt8(r)) => *l == *r,
1151 (UInt16(l), UInt16(r)) => *l == *r,
1152 (UInt32(l), UInt32(r)) => *l == *r,
1153 (UInt64(l), UInt64(r)) => *l == *r,
1154 (Int8(l), Int8(r)) => *l == *r,
1155 (Int16(l), Int16(r)) => *l == *r,
1156 (Int32(l), Int32(r)) => *l == *r,
1157 (Int64(l), Int64(r)) => *l == *r,
1158 (Int128(l), Int128(r)) => *l == *r,
1159 (Float32(l), Float32(r)) => l.to_total_ord() == r.to_total_ord(),
1160 (Float64(l), Float64(r)) => l.to_total_ord() == r.to_total_ord(),
1161 (String(l), String(r)) => l == r,
1162 (Binary(l), Binary(r)) => l == r,
1163 #[cfg(feature = "dtype-time")]
1164 (Time(l), Time(r)) => *l == *r,
1165 #[cfg(all(feature = "dtype-datetime", feature = "dtype-date"))]
1166 (Date(l), Date(r)) => *l == *r,
1167 #[cfg(all(feature = "dtype-datetime", feature = "dtype-date"))]
1168 (Datetime(l, tul, tzl), Datetime(r, tur, tzr)) => {
1169 *l == *r && *tul == *tur && tzl == tzr
1170 },
1171 (List(l), List(r)) => l == r,
1172 #[cfg(feature = "dtype-categorical")]
1173 (Categorical(cat_l, map_l), Categorical(cat_r, map_r)) => {
1174 if !Arc::ptr_eq(map_l, map_r) {
1175 unimplemented!(
1178 "comparing categoricals with different Categories is not supported through AnyValue"
1179 );
1180 }
1181
1182 cat_l == cat_r
1183 },
1184 #[cfg(feature = "dtype-categorical")]
1185 (Enum(cat_l, map_l), Enum(cat_r, map_r)) => {
1186 if !Arc::ptr_eq(map_l, map_r) {
1187 unimplemented!(
1190 "comparing enums with different FrozenCategories is not supported through AnyValue"
1191 );
1192 }
1193
1194 cat_l == cat_r
1195 },
1196 #[cfg(feature = "dtype-duration")]
1197 (Duration(l, tu_l), Duration(r, tu_r)) => l == r && tu_l == tu_r,
1198
1199 #[cfg(feature = "dtype-struct")]
1200 (StructOwned(l), StructOwned(r)) => struct_eq_missing(
1201 struct_owned_value_iter(l.as_ref()),
1202 struct_owned_value_iter(r.as_ref()),
1203 null_equal,
1204 ),
1205 #[cfg(feature = "dtype-struct")]
1206 (StructOwned(l), Struct(idx, arr, _)) => struct_eq_missing(
1207 struct_owned_value_iter(l.as_ref()),
1208 struct_value_iter(*idx, arr),
1209 null_equal,
1210 ),
1211 #[cfg(feature = "dtype-struct")]
1212 (Struct(idx, arr, _), StructOwned(r)) => struct_eq_missing(
1213 struct_value_iter(*idx, arr),
1214 struct_owned_value_iter(r.as_ref()),
1215 null_equal,
1216 ),
1217 #[cfg(feature = "dtype-struct")]
1218 (Struct(l_idx, l_arr, _), Struct(r_idx, r_arr, _)) => struct_eq_missing(
1219 struct_value_iter(*l_idx, l_arr),
1220 struct_value_iter(*r_idx, r_arr),
1221 null_equal,
1222 ),
1223 #[cfg(feature = "dtype-decimal")]
1224 (Decimal(l_v, l_s), Decimal(r_v, r_s)) => {
1225 if l_s == r_s && l_v == r_v || *l_v == 0 && *r_v == 0 {
1227 true
1228 } else if l_s < r_s {
1229 if let Some(lhs) = (|| {
1231 let exp = i128::checked_pow(10, (r_s - l_s).try_into().ok()?)?;
1232 l_v.checked_mul(exp)
1233 })() {
1234 lhs == *r_v
1235 } else {
1236 false
1237 }
1238 } else {
1239 if let Some(rhs) = (|| {
1241 let exp = i128::checked_pow(10, (l_s - r_s).try_into().ok()?)?;
1242 r_v.checked_mul(exp)
1243 })() {
1244 *l_v == rhs
1245 } else {
1246 false
1247 }
1248 }
1249 },
1250 #[cfg(feature = "object")]
1251 (Object(l), Object(r)) => l == r,
1252 #[cfg(feature = "dtype-array")]
1253 (Array(l_values, l_size), Array(r_values, r_size)) => {
1254 if l_size != r_size {
1255 return false;
1256 }
1257
1258 debug_assert_eq!(l_values.len(), *l_size);
1259 debug_assert_eq!(r_values.len(), *r_size);
1260
1261 let mut is_equal = true;
1262 for i in 0..*l_size {
1263 let l = unsafe { l_values.get_unchecked(i) };
1264 let r = unsafe { r_values.get_unchecked(i) };
1265
1266 is_equal &= l.eq_missing(&r, null_equal);
1267 }
1268 is_equal
1269 },
1270
1271 (l, r) if l.to_i128().is_some() && r.to_i128().is_some() => l.to_i128() == r.to_i128(),
1272 (l, r) if l.to_f64().is_some() && r.to_f64().is_some() => {
1273 l.to_f64().unwrap().to_total_ord() == r.to_f64().unwrap().to_total_ord()
1274 },
1275
1276 (_, _) => {
1277 unimplemented!(
1278 "scalar eq_missing for mixed dtypes {self:?} and {other:?} is not supported"
1279 )
1280 },
1281 }
1282 }
1283}
1284
1285impl PartialEq for AnyValue<'_> {
1286 #[inline]
1287 fn eq(&self, other: &Self) -> bool {
1288 self.eq_missing(other, true)
1289 }
1290}
1291
1292impl PartialOrd for AnyValue<'_> {
1293 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1295 use AnyValue::*;
1296 match (self, &other) {
1297 (StringOwned(l), r) => AnyValue::String(l.as_str()).partial_cmp(r),
1299 (BinaryOwned(l), r) => AnyValue::Binary(l.as_slice()).partial_cmp(r),
1300 #[cfg(feature = "object")]
1301 (ObjectOwned(l), r) => AnyValue::Object(&*l.0).partial_cmp(r),
1302 (l, StringOwned(r)) => l.partial_cmp(&AnyValue::String(r.as_str())),
1303 (l, BinaryOwned(r)) => l.partial_cmp(&AnyValue::Binary(r.as_slice())),
1304 #[cfg(feature = "object")]
1305 (l, ObjectOwned(r)) => l.partial_cmp(&AnyValue::Object(&*r.0)),
1306 #[cfg(feature = "dtype-datetime")]
1307 (DatetimeOwned(lv, ltu, ltz), r) => {
1308 Datetime(*lv, *ltu, ltz.as_ref().map(|v| v.as_ref())).partial_cmp(r)
1309 },
1310 #[cfg(feature = "dtype-datetime")]
1311 (l, DatetimeOwned(rv, rtu, rtz)) => {
1312 l.partial_cmp(&Datetime(*rv, *rtu, rtz.as_ref().map(|v| v.as_ref())))
1313 },
1314 #[cfg(feature = "dtype-categorical")]
1315 (CategoricalOwned(cat, map), r) => Categorical(*cat, map).partial_cmp(r),
1316 #[cfg(feature = "dtype-categorical")]
1317 (l, CategoricalOwned(cat, map)) => l.partial_cmp(&Categorical(*cat, map)),
1318 #[cfg(feature = "dtype-categorical")]
1319 (EnumOwned(cat, map), r) => Enum(*cat, map).partial_cmp(r),
1320 #[cfg(feature = "dtype-categorical")]
1321 (l, EnumOwned(cat, map)) => l.partial_cmp(&Enum(*cat, map)),
1322
1323 (Null, Null) => Some(Ordering::Equal),
1325 (Null, _) => Some(Ordering::Less),
1326 (_, Null) => Some(Ordering::Greater),
1327
1328 (Boolean(l), Boolean(r)) => l.partial_cmp(r),
1330 (UInt8(l), UInt8(r)) => l.partial_cmp(r),
1331 (UInt16(l), UInt16(r)) => l.partial_cmp(r),
1332 (UInt32(l), UInt32(r)) => l.partial_cmp(r),
1333 (UInt64(l), UInt64(r)) => l.partial_cmp(r),
1334 (Int8(l), Int8(r)) => l.partial_cmp(r),
1335 (Int16(l), Int16(r)) => l.partial_cmp(r),
1336 (Int32(l), Int32(r)) => l.partial_cmp(r),
1337 (Int64(l), Int64(r)) => l.partial_cmp(r),
1338 (Int128(l), Int128(r)) => l.partial_cmp(r),
1339 (Float32(l), Float32(r)) => Some(l.tot_cmp(r)),
1340 (Float64(l), Float64(r)) => Some(l.tot_cmp(r)),
1341 (String(l), String(r)) => l.partial_cmp(r),
1342 (Binary(l), Binary(r)) => l.partial_cmp(r),
1343 #[cfg(feature = "dtype-date")]
1344 (Date(l), Date(r)) => l.partial_cmp(r),
1345 #[cfg(feature = "dtype-datetime")]
1346 (Datetime(lt, lu, lz), Datetime(rt, ru, rz)) => {
1347 if lu != ru || lz != rz {
1348 unimplemented!(
1349 "comparing datetimes with different units or timezones is not supported"
1350 );
1351 }
1352
1353 lt.partial_cmp(rt)
1354 },
1355 #[cfg(feature = "dtype-duration")]
1356 (Duration(lt, lu), Duration(rt, ru)) => {
1357 if lu != ru {
1358 unimplemented!("comparing durations with different units is not supported");
1359 }
1360
1361 lt.partial_cmp(rt)
1362 },
1363 #[cfg(feature = "dtype-time")]
1364 (Time(l), Time(r)) => l.partial_cmp(r),
1365 #[cfg(feature = "dtype-categorical")]
1366 (Categorical(l_cat, l_map), Categorical(r_cat, r_map)) => unsafe {
1367 let l_str = l_map.cat_to_str_unchecked(*l_cat);
1368 let r_str = r_map.cat_to_str_unchecked(*r_cat);
1369 l_str.partial_cmp(r_str)
1370 },
1371 #[cfg(feature = "dtype-categorical")]
1372 (Enum(l_cat, l_map), Enum(r_cat, r_map)) => {
1373 if !Arc::ptr_eq(l_map, r_map) {
1374 unimplemented!("can't order enums from different FrozenCategories")
1375 }
1376 l_cat.partial_cmp(r_cat)
1377 },
1378 (List(_), List(_)) => {
1379 unimplemented!("ordering for List dtype is not supported")
1380 },
1381 #[cfg(feature = "dtype-array")]
1382 (Array(..), Array(..)) => {
1383 unimplemented!("ordering for Array dtype is not supported")
1384 },
1385 #[cfg(feature = "object")]
1386 (Object(_), Object(_)) => {
1387 unimplemented!("ordering for Object dtype is not supported")
1388 },
1389 #[cfg(feature = "dtype-struct")]
1390 (StructOwned(_), StructOwned(_))
1391 | (StructOwned(_), Struct(..))
1392 | (Struct(..), StructOwned(_))
1393 | (Struct(..), Struct(..)) => {
1394 unimplemented!("ordering for Struct dtype is not supported")
1395 },
1396 #[cfg(feature = "dtype-decimal")]
1397 (Decimal(l_v, l_s), Decimal(r_v, r_s)) => {
1398 if l_s == r_s && l_v == r_v || *l_v == 0 && *r_v == 0 {
1400 Some(Ordering::Equal)
1401 } else if l_s < r_s {
1402 if let Some(lhs) = (|| {
1404 let exp = i128::checked_pow(10, (r_s - l_s).try_into().ok()?)?;
1405 l_v.checked_mul(exp)
1406 })() {
1407 lhs.partial_cmp(r_v)
1408 } else {
1409 Some(Ordering::Greater)
1410 }
1411 } else {
1412 if let Some(rhs) = (|| {
1414 let exp = i128::checked_pow(10, (l_s - r_s).try_into().ok()?)?;
1415 r_v.checked_mul(exp)
1416 })() {
1417 l_v.partial_cmp(&rhs)
1418 } else {
1419 Some(Ordering::Less)
1420 }
1421 }
1422 },
1423
1424 (_, _) => {
1425 unimplemented!(
1426 "scalar ordering for mixed dtypes {self:?} and {other:?} is not supported"
1427 )
1428 },
1429 }
1430 }
1431}
1432
1433impl TotalEq for AnyValue<'_> {
1434 #[inline]
1435 fn tot_eq(&self, other: &Self) -> bool {
1436 self.eq_missing(other, true)
1437 }
1438}
1439
1440#[cfg(feature = "dtype-struct")]
1441fn struct_to_avs_static(idx: usize, arr: &StructArray, fields: &[Field]) -> Vec<AnyValue<'static>> {
1442 assert!(idx < arr.len());
1443
1444 let arrs = arr.values();
1445
1446 debug_assert_eq!(arrs.len(), fields.len());
1447
1448 arrs.iter()
1449 .zip(fields)
1450 .map(|(arr, field)| {
1451 unsafe { arr_to_any_value(arr.as_ref(), idx, &field.dtype) }.into_static()
1455 })
1456 .collect()
1457}
1458
1459pub trait GetAnyValue {
1460 unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_>;
1464}
1465
1466impl GetAnyValue for ArrayRef {
1467 unsafe fn get_unchecked(&self, index: usize) -> AnyValue<'_> {
1469 match self.dtype() {
1470 ArrowDataType::Int8 => {
1471 let arr = self
1472 .as_any()
1473 .downcast_ref::<PrimitiveArray<i8>>()
1474 .unwrap_unchecked();
1475 match arr.get_unchecked(index) {
1476 None => AnyValue::Null,
1477 Some(v) => AnyValue::Int8(v),
1478 }
1479 },
1480 ArrowDataType::Int16 => {
1481 let arr = self
1482 .as_any()
1483 .downcast_ref::<PrimitiveArray<i16>>()
1484 .unwrap_unchecked();
1485 match arr.get_unchecked(index) {
1486 None => AnyValue::Null,
1487 Some(v) => AnyValue::Int16(v),
1488 }
1489 },
1490 ArrowDataType::Int32 => {
1491 let arr = self
1492 .as_any()
1493 .downcast_ref::<PrimitiveArray<i32>>()
1494 .unwrap_unchecked();
1495 match arr.get_unchecked(index) {
1496 None => AnyValue::Null,
1497 Some(v) => AnyValue::Int32(v),
1498 }
1499 },
1500 ArrowDataType::Int64 => {
1501 let arr = self
1502 .as_any()
1503 .downcast_ref::<PrimitiveArray<i64>>()
1504 .unwrap_unchecked();
1505 match arr.get_unchecked(index) {
1506 None => AnyValue::Null,
1507 Some(v) => AnyValue::Int64(v),
1508 }
1509 },
1510 ArrowDataType::Int128 => {
1511 let arr = self
1512 .as_any()
1513 .downcast_ref::<PrimitiveArray<i128>>()
1514 .unwrap_unchecked();
1515 match arr.get_unchecked(index) {
1516 None => AnyValue::Null,
1517 Some(v) => AnyValue::Int128(v),
1518 }
1519 },
1520 ArrowDataType::UInt8 => {
1521 let arr = self
1522 .as_any()
1523 .downcast_ref::<PrimitiveArray<u8>>()
1524 .unwrap_unchecked();
1525 match arr.get_unchecked(index) {
1526 None => AnyValue::Null,
1527 Some(v) => AnyValue::UInt8(v),
1528 }
1529 },
1530 ArrowDataType::UInt16 => {
1531 let arr = self
1532 .as_any()
1533 .downcast_ref::<PrimitiveArray<u16>>()
1534 .unwrap_unchecked();
1535 match arr.get_unchecked(index) {
1536 None => AnyValue::Null,
1537 Some(v) => AnyValue::UInt16(v),
1538 }
1539 },
1540 ArrowDataType::UInt32 => {
1541 let arr = self
1542 .as_any()
1543 .downcast_ref::<PrimitiveArray<u32>>()
1544 .unwrap_unchecked();
1545 match arr.get_unchecked(index) {
1546 None => AnyValue::Null,
1547 Some(v) => AnyValue::UInt32(v),
1548 }
1549 },
1550 ArrowDataType::UInt64 => {
1551 let arr = self
1552 .as_any()
1553 .downcast_ref::<PrimitiveArray<u64>>()
1554 .unwrap_unchecked();
1555 match arr.get_unchecked(index) {
1556 None => AnyValue::Null,
1557 Some(v) => AnyValue::UInt64(v),
1558 }
1559 },
1560 ArrowDataType::Float32 => {
1561 let arr = self
1562 .as_any()
1563 .downcast_ref::<PrimitiveArray<f32>>()
1564 .unwrap_unchecked();
1565 match arr.get_unchecked(index) {
1566 None => AnyValue::Null,
1567 Some(v) => AnyValue::Float32(v),
1568 }
1569 },
1570 ArrowDataType::Float64 => {
1571 let arr = self
1572 .as_any()
1573 .downcast_ref::<PrimitiveArray<f64>>()
1574 .unwrap_unchecked();
1575 match arr.get_unchecked(index) {
1576 None => AnyValue::Null,
1577 Some(v) => AnyValue::Float64(v),
1578 }
1579 },
1580 ArrowDataType::Boolean => {
1581 let arr = self
1582 .as_any()
1583 .downcast_ref::<BooleanArray>()
1584 .unwrap_unchecked();
1585 match arr.get_unchecked(index) {
1586 None => AnyValue::Null,
1587 Some(v) => AnyValue::Boolean(v),
1588 }
1589 },
1590 ArrowDataType::LargeUtf8 => {
1591 let arr = self
1592 .as_any()
1593 .downcast_ref::<LargeStringArray>()
1594 .unwrap_unchecked();
1595 match arr.get_unchecked(index) {
1596 None => AnyValue::Null,
1597 Some(v) => AnyValue::String(v),
1598 }
1599 },
1600 _ => unimplemented!(),
1601 }
1602 }
1603}
1604
1605impl<K: NumericNative> From<K> for AnyValue<'static> {
1606 fn from(value: K) -> Self {
1607 unsafe {
1608 match K::PRIMITIVE {
1609 PrimitiveType::Int8 => AnyValue::Int8(NumCast::from(value).unwrap_unchecked()),
1610 PrimitiveType::Int16 => AnyValue::Int16(NumCast::from(value).unwrap_unchecked()),
1611 PrimitiveType::Int32 => AnyValue::Int32(NumCast::from(value).unwrap_unchecked()),
1612 PrimitiveType::Int64 => AnyValue::Int64(NumCast::from(value).unwrap_unchecked()),
1613 PrimitiveType::Int128 => AnyValue::Int128(NumCast::from(value).unwrap_unchecked()),
1614 PrimitiveType::UInt8 => AnyValue::UInt8(NumCast::from(value).unwrap_unchecked()),
1615 PrimitiveType::UInt16 => AnyValue::UInt16(NumCast::from(value).unwrap_unchecked()),
1616 PrimitiveType::UInt32 => AnyValue::UInt32(NumCast::from(value).unwrap_unchecked()),
1617 PrimitiveType::UInt64 => AnyValue::UInt64(NumCast::from(value).unwrap_unchecked()),
1618 PrimitiveType::Float32 => {
1619 AnyValue::Float32(NumCast::from(value).unwrap_unchecked())
1620 },
1621 PrimitiveType::Float64 => {
1622 AnyValue::Float64(NumCast::from(value).unwrap_unchecked())
1623 },
1624 _ => unreachable!(),
1626 }
1627 }
1628 }
1629}
1630
1631impl<'a> From<&'a [u8]> for AnyValue<'a> {
1632 fn from(value: &'a [u8]) -> Self {
1633 AnyValue::Binary(value)
1634 }
1635}
1636
1637impl<'a> From<&'a str> for AnyValue<'a> {
1638 fn from(value: &'a str) -> Self {
1639 AnyValue::String(value)
1640 }
1641}
1642
1643impl From<bool> for AnyValue<'static> {
1644 fn from(value: bool) -> Self {
1645 AnyValue::Boolean(value)
1646 }
1647}
1648
1649#[cfg(test)]
1650mod test {
1651 #[cfg(feature = "dtype-categorical")]
1652 use super::*;
1653
1654 #[test]
1655 #[cfg(feature = "dtype-categorical")]
1656 fn test_arrow_dtypes_to_polars() {
1657 let dtypes = [
1658 (
1659 ArrowDataType::Duration(ArrowTimeUnit::Nanosecond),
1660 DataType::Duration(TimeUnit::Nanoseconds),
1661 ),
1662 (
1663 ArrowDataType::Duration(ArrowTimeUnit::Millisecond),
1664 DataType::Duration(TimeUnit::Milliseconds),
1665 ),
1666 (
1667 ArrowDataType::Date64,
1668 DataType::Datetime(TimeUnit::Milliseconds, None),
1669 ),
1670 (
1671 ArrowDataType::Timestamp(ArrowTimeUnit::Nanosecond, None),
1672 DataType::Datetime(TimeUnit::Nanoseconds, None),
1673 ),
1674 (
1675 ArrowDataType::Timestamp(ArrowTimeUnit::Microsecond, None),
1676 DataType::Datetime(TimeUnit::Microseconds, None),
1677 ),
1678 (
1679 ArrowDataType::Timestamp(ArrowTimeUnit::Millisecond, None),
1680 DataType::Datetime(TimeUnit::Milliseconds, None),
1681 ),
1682 (
1683 ArrowDataType::Timestamp(ArrowTimeUnit::Second, None),
1684 DataType::Datetime(TimeUnit::Milliseconds, None),
1685 ),
1686 (
1687 ArrowDataType::Timestamp(ArrowTimeUnit::Second, Some(PlSmallStr::EMPTY)),
1688 DataType::Datetime(TimeUnit::Milliseconds, None),
1689 ),
1690 (ArrowDataType::LargeUtf8, DataType::String),
1691 (ArrowDataType::Utf8, DataType::String),
1692 (ArrowDataType::LargeBinary, DataType::Binary),
1693 (ArrowDataType::Binary, DataType::Binary),
1694 (
1695 ArrowDataType::Time64(ArrowTimeUnit::Nanosecond),
1696 DataType::Time,
1697 ),
1698 (
1699 ArrowDataType::Time64(ArrowTimeUnit::Millisecond),
1700 DataType::Time,
1701 ),
1702 (
1703 ArrowDataType::Time64(ArrowTimeUnit::Microsecond),
1704 DataType::Time,
1705 ),
1706 (ArrowDataType::Time64(ArrowTimeUnit::Second), DataType::Time),
1707 (
1708 ArrowDataType::Time32(ArrowTimeUnit::Nanosecond),
1709 DataType::Time,
1710 ),
1711 (
1712 ArrowDataType::Time32(ArrowTimeUnit::Millisecond),
1713 DataType::Time,
1714 ),
1715 (
1716 ArrowDataType::Time32(ArrowTimeUnit::Microsecond),
1717 DataType::Time,
1718 ),
1719 (ArrowDataType::Time32(ArrowTimeUnit::Second), DataType::Time),
1720 (
1721 ArrowDataType::List(Box::new(ArrowField::new(
1722 LIST_VALUES_NAME,
1723 ArrowDataType::Float64,
1724 true,
1725 ))),
1726 DataType::List(DataType::Float64.into()),
1727 ),
1728 (
1729 ArrowDataType::LargeList(Box::new(ArrowField::new(
1730 LIST_VALUES_NAME,
1731 ArrowDataType::Float64,
1732 true,
1733 ))),
1734 DataType::List(DataType::Float64.into()),
1735 ),
1736 ];
1737
1738 for (dt_a, dt_p) in dtypes {
1739 let dt = DataType::from_arrow_dtype(&dt_a);
1740
1741 assert_eq!(dt_p, dt);
1742 }
1743 }
1744}