polars_plan/plans/
lit.rs

1use std::hash::{Hash, Hasher};
2
3#[cfg(feature = "temporal")]
4use chrono::{Duration as ChronoDuration, NaiveDate, NaiveDateTime};
5use polars_core::chunked_array::cast::CastOptions;
6use polars_core::prelude::*;
7use polars_core::utils::materialize_dyn_int;
8use polars_utils::hashing::hash_to_partition;
9#[cfg(feature = "serde")]
10use serde::{Deserialize, Serialize};
11
12use crate::constants::get_literal_name;
13use crate::prelude::*;
14
15#[derive(Clone, PartialEq)]
16#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
17#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
18pub enum DynLiteralValue {
19    Str(PlSmallStr),
20    Int(i128),
21    Float(f64),
22    List(DynListLiteralValue),
23}
24#[derive(Clone, PartialEq)]
25#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
26#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
27pub enum DynListLiteralValue {
28    Str(Box<[Option<PlSmallStr>]>),
29    Int(Box<[Option<i128>]>),
30    Float(Box<[Option<f64>]>),
31    List(Box<[Option<DynListLiteralValue>]>),
32}
33
34impl Hash for DynLiteralValue {
35    fn hash<H: Hasher>(&self, state: &mut H) {
36        std::mem::discriminant(self).hash(state);
37        match self {
38            Self::Str(i) => i.hash(state),
39            Self::Int(i) => i.hash(state),
40            Self::Float(i) => i.to_ne_bytes().hash(state),
41            Self::List(i) => i.hash(state),
42        }
43    }
44}
45
46impl Hash for DynListLiteralValue {
47    fn hash<H: Hasher>(&self, state: &mut H) {
48        std::mem::discriminant(self).hash(state);
49        match self {
50            Self::Str(i) => i.hash(state),
51            Self::Int(i) => i.hash(state),
52            Self::Float(i) => i
53                .iter()
54                .for_each(|i| i.map(|i| i.to_ne_bytes()).hash(state)),
55            Self::List(i) => i.hash(state),
56        }
57    }
58}
59
60#[derive(Clone, PartialEq, Hash)]
61#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
62#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
63pub struct RangeLiteralValue {
64    pub low: i128,
65    pub high: i128,
66    pub dtype: DataType,
67}
68#[derive(Clone, PartialEq)]
69#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
70#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
71pub enum LiteralValue {
72    /// A dynamically inferred literal value. This needs to be materialized into a specific type.
73    Dyn(DynLiteralValue),
74    Scalar(Scalar),
75    Series(SpecialEq<Series>),
76    Range(RangeLiteralValue),
77}
78
79pub enum MaterializedLiteralValue {
80    Scalar(Scalar),
81    Series(Series),
82}
83
84impl DynListLiteralValue {
85    pub fn try_materialize_to_dtype(self, dtype: &DataType) -> PolarsResult<Scalar> {
86        let Some(inner_dtype) = dtype.inner_dtype() else {
87            polars_bail!(InvalidOperation: "conversion from list literal to `{dtype}` failed.");
88        };
89
90        let s = match self {
91            DynListLiteralValue::Str(vs) => {
92                StringChunked::from_iter_options(PlSmallStr::from_static("literal"), vs.into_iter())
93                    .into_series()
94            },
95            DynListLiteralValue::Int(vs) => {
96                #[cfg(feature = "dtype-i128")]
97                {
98                    Int128Chunked::from_iter_options(
99                        PlSmallStr::from_static("literal"),
100                        vs.into_iter(),
101                    )
102                    .into_series()
103                }
104
105                #[cfg(not(feature = "dtype-i128"))]
106                {
107                    Int64Chunked::from_iter_options(
108                        PlSmallStr::from_static("literal"),
109                        vs.into_iter().map(|v| v.map(|v| v as i64)),
110                    )
111                    .into_series()
112                }
113            },
114            DynListLiteralValue::Float(vs) => Float64Chunked::from_iter_options(
115                PlSmallStr::from_static("literal"),
116                vs.into_iter(),
117            )
118            .into_series(),
119            DynListLiteralValue::List(_) => todo!("nested lists"),
120        };
121
122        let s = s.cast_with_options(inner_dtype, CastOptions::Strict)?;
123        let value = match dtype {
124            DataType::List(_) => AnyValue::List(s),
125            #[cfg(feature = "dtype-array")]
126            DataType::Array(_, size) => AnyValue::Array(s, *size),
127            _ => unreachable!(),
128        };
129
130        Ok(Scalar::new(dtype.clone(), value))
131    }
132}
133
134impl DynLiteralValue {
135    pub fn try_materialize_to_dtype(self, dtype: &DataType) -> PolarsResult<Scalar> {
136        match self {
137            DynLiteralValue::Str(s) => {
138                Ok(Scalar::from(s).cast_with_options(dtype, CastOptions::Strict)?)
139            },
140            DynLiteralValue::Int(i) => {
141                #[cfg(not(feature = "dtype-i128"))]
142                let i: i64 = i.try_into().expect("activate dtype-i128 feature");
143
144                Ok(Scalar::from(i).cast_with_options(dtype, CastOptions::Strict)?)
145            },
146            DynLiteralValue::Float(f) => {
147                Ok(Scalar::from(f).cast_with_options(dtype, CastOptions::Strict)?)
148            },
149            DynLiteralValue::List(dyn_list_value) => dyn_list_value.try_materialize_to_dtype(dtype),
150        }
151    }
152}
153
154impl RangeLiteralValue {
155    pub fn try_materialize_to_series(self, dtype: &DataType) -> PolarsResult<Series> {
156        fn handle_range_oob(range: &RangeLiteralValue, to_dtype: &DataType) -> PolarsResult<()> {
157            polars_bail!(
158                InvalidOperation:
159                "conversion from `{}` to `{to_dtype}` failed for range({}, {})",
160                range.dtype, range.low, range.high,
161            )
162        }
163
164        let s = match dtype {
165            DataType::Int32 => {
166                if self.low < i32::MIN as i128 || self.high > i32::MAX as i128 {
167                    handle_range_oob(&self, dtype)?;
168                }
169
170                new_int_range::<Int32Type>(
171                    self.low as i32,
172                    self.high as i32,
173                    1,
174                    PlSmallStr::from_static("range"),
175                )
176                .unwrap()
177            },
178            DataType::Int64 => {
179                if self.low < i64::MIN as i128 || self.high > i64::MAX as i128 {
180                    handle_range_oob(&self, dtype)?;
181                }
182
183                new_int_range::<Int64Type>(
184                    self.low as i64,
185                    self.high as i64,
186                    1,
187                    PlSmallStr::from_static("range"),
188                )
189                .unwrap()
190            },
191            DataType::UInt32 => {
192                if self.low < u32::MIN as i128 || self.high > u32::MAX as i128 {
193                    handle_range_oob(&self, dtype)?;
194                }
195                new_int_range::<UInt32Type>(
196                    self.low as u32,
197                    self.high as u32,
198                    1,
199                    PlSmallStr::from_static("range"),
200                )
201                .unwrap()
202            },
203            _ => polars_bail!(InvalidOperation: "unsupported range datatype `{dtype}`"),
204        };
205
206        Ok(s)
207    }
208}
209
210impl LiteralValue {
211    /// Get the output name as [`PlSmallStr`].
212    pub(crate) fn output_column_name(&self) -> &PlSmallStr {
213        match self {
214            LiteralValue::Series(s) => s.name(),
215            _ => get_literal_name(),
216        }
217    }
218
219    pub fn try_materialize_to_dtype(
220        self,
221        dtype: &DataType,
222    ) -> PolarsResult<MaterializedLiteralValue> {
223        use LiteralValue as L;
224        match self {
225            L::Dyn(dyn_value) => dyn_value
226                .try_materialize_to_dtype(dtype)
227                .map(MaterializedLiteralValue::Scalar),
228            L::Scalar(sc) => Ok(MaterializedLiteralValue::Scalar(
229                sc.cast_with_options(dtype, CastOptions::Strict)?,
230            )),
231            L::Range(range) => {
232                let Some(inner_dtype) = dtype.inner_dtype() else {
233                    polars_bail!(
234                        InvalidOperation: "cannot turn `{}` range into `{dtype}`",
235                        range.dtype
236                    );
237                };
238
239                let s = range.try_materialize_to_series(inner_dtype)?;
240                let value = match dtype {
241                    DataType::List(_) => AnyValue::List(s),
242                    #[cfg(feature = "dtype-array")]
243                    DataType::Array(_, size) => AnyValue::Array(s, *size),
244                    _ => unreachable!(),
245                };
246                Ok(MaterializedLiteralValue::Scalar(Scalar::new(
247                    dtype.clone(),
248                    value,
249                )))
250            },
251            L::Series(s) => Ok(MaterializedLiteralValue::Series(
252                s.cast_with_options(dtype, CastOptions::Strict)?,
253            )),
254        }
255    }
256
257    pub fn extract_usize(&self) -> PolarsResult<usize> {
258        macro_rules! cast_usize {
259            ($v:expr) => {
260                usize::try_from($v).map_err(
261                    |_| polars_err!(InvalidOperation: "cannot convert value {} to usize", $v)
262                )
263            }
264        }
265        match &self {
266            Self::Dyn(DynLiteralValue::Int(v)) => cast_usize!(*v),
267            Self::Scalar(sc) => match sc.as_any_value() {
268                AnyValue::UInt8(v) => Ok(v as usize),
269                AnyValue::UInt16(v) => Ok(v as usize),
270                AnyValue::UInt32(v) => cast_usize!(v),
271                AnyValue::UInt64(v) => cast_usize!(v),
272                AnyValue::Int8(v) => cast_usize!(v),
273                AnyValue::Int16(v) => cast_usize!(v),
274                AnyValue::Int32(v) => cast_usize!(v),
275                AnyValue::Int64(v) => cast_usize!(v),
276                AnyValue::Int128(v) => cast_usize!(v),
277                _ => {
278                    polars_bail!(InvalidOperation: "expression must be constant literal to extract integer")
279                },
280            },
281            _ => {
282                polars_bail!(InvalidOperation: "expression must be constant literal to extract integer")
283            },
284        }
285    }
286
287    pub fn materialize(self) -> Self {
288        match self {
289            LiteralValue::Dyn(_) => {
290                let av = self.to_any_value().unwrap();
291                av.into()
292            },
293            lv => lv,
294        }
295    }
296
297    pub fn is_scalar(&self) -> bool {
298        !matches!(self, LiteralValue::Series(_) | LiteralValue::Range { .. })
299    }
300
301    pub fn to_any_value(&self) -> Option<AnyValue> {
302        let av = match self {
303            Self::Scalar(sc) => sc.value().clone(),
304            Self::Range(range) => {
305                let s = range.clone().try_materialize_to_series(&range.dtype).ok()?;
306                AnyValue::List(s)
307            },
308            Self::Series(_) => return None,
309            Self::Dyn(d) => match d {
310                DynLiteralValue::Int(v) => materialize_dyn_int(*v),
311                DynLiteralValue::Float(v) => AnyValue::Float64(*v),
312                DynLiteralValue::Str(v) => AnyValue::String(v),
313                DynLiteralValue::List(_) => todo!(),
314            },
315        };
316        Some(av)
317    }
318
319    /// Getter for the `DataType` of the value
320    pub fn get_datatype(&self) -> DataType {
321        match self {
322            Self::Dyn(d) => match d {
323                DynLiteralValue::Int(v) => DataType::Unknown(UnknownKind::Int(*v)),
324                DynLiteralValue::Float(_) => DataType::Unknown(UnknownKind::Float),
325                DynLiteralValue::Str(_) => DataType::Unknown(UnknownKind::Str),
326                DynLiteralValue::List(_) => todo!(),
327            },
328            Self::Scalar(sc) => sc.dtype().clone(),
329            Self::Series(s) => s.dtype().clone(),
330            Self::Range(s) => s.dtype.clone(),
331        }
332    }
333
334    pub fn new_idxsize(value: IdxSize) -> Self {
335        LiteralValue::Scalar(value.into())
336    }
337
338    pub fn extract_str(&self) -> Option<&str> {
339        match self {
340            LiteralValue::Dyn(DynLiteralValue::Str(s)) => Some(s.as_str()),
341            LiteralValue::Scalar(sc) => match sc.value() {
342                AnyValue::String(s) => Some(s),
343                AnyValue::StringOwned(s) => Some(s),
344                _ => None,
345            },
346            _ => None,
347        }
348    }
349
350    pub fn extract_binary(&self) -> Option<&[u8]> {
351        match self {
352            LiteralValue::Scalar(sc) => match sc.value() {
353                AnyValue::Binary(s) => Some(s),
354                AnyValue::BinaryOwned(s) => Some(s),
355                _ => None,
356            },
357            _ => None,
358        }
359    }
360
361    pub fn is_null(&self) -> bool {
362        match self {
363            Self::Scalar(sc) => sc.is_null(),
364            Self::Series(s) => s.len() == 1 && s.null_count() == 1,
365            _ => false,
366        }
367    }
368
369    pub fn bool(&self) -> Option<bool> {
370        match self {
371            LiteralValue::Scalar(s) => match s.as_any_value() {
372                AnyValue::Boolean(b) => Some(b),
373                _ => None,
374            },
375            _ => None,
376        }
377    }
378
379    pub const fn untyped_null() -> Self {
380        Self::Scalar(Scalar::null(DataType::Null))
381    }
382
383    pub fn implode(self) -> PolarsResult<Self> {
384        let series = match self.materialize() {
385            LiteralValue::Dyn(_) => unreachable!(),
386            LiteralValue::Scalar(scalar) => scalar.into_series(PlSmallStr::EMPTY),
387            LiteralValue::Series(series) => series.into_inner(),
388            LiteralValue::Range(range) => {
389                let dtype = range.dtype.clone();
390                range.try_materialize_to_series(&dtype)?
391            },
392        };
393
394        let dtype = DataType::List(Box::new(series.dtype().clone()));
395        Ok(LiteralValue::Scalar(Scalar::new(
396            dtype,
397            AnyValue::List(series),
398        )))
399    }
400}
401
402impl From<Scalar> for LiteralValue {
403    fn from(value: Scalar) -> Self {
404        Self::Scalar(value)
405    }
406}
407
408pub trait Literal {
409    /// [Literal](Expr::Literal) expression.
410    fn lit(self) -> Expr;
411}
412
413pub trait TypedLiteral: Literal {
414    /// [Literal](Expr::Literal) expression.
415    fn typed_lit(self) -> Expr
416    where
417        Self: Sized,
418    {
419        self.lit()
420    }
421}
422
423impl TypedLiteral for String {}
424impl TypedLiteral for &str {}
425
426impl Literal for PlSmallStr {
427    fn lit(self) -> Expr {
428        Expr::Literal(Scalar::from(self).into())
429    }
430}
431
432impl Literal for String {
433    fn lit(self) -> Expr {
434        Expr::Literal(Scalar::from(PlSmallStr::from_string(self)).into())
435    }
436}
437
438impl Literal for &str {
439    fn lit(self) -> Expr {
440        Expr::Literal(Scalar::from(PlSmallStr::from_str(self)).into())
441    }
442}
443
444impl Literal for Vec<u8> {
445    fn lit(self) -> Expr {
446        Expr::Literal(Scalar::from(self).into())
447    }
448}
449
450impl Literal for &[u8] {
451    fn lit(self) -> Expr {
452        Expr::Literal(Scalar::from(self.to_vec()).into())
453    }
454}
455
456impl From<AnyValue<'_>> for LiteralValue {
457    fn from(value: AnyValue<'_>) -> Self {
458        Self::Scalar(Scalar::new(value.dtype(), value.into_static()))
459    }
460}
461
462macro_rules! make_literal {
463    ($TYPE:ty, $SCALAR:ident) => {
464        impl Literal for $TYPE {
465            fn lit(self) -> Expr {
466                Expr::Literal(Scalar::from(self).into())
467            }
468        }
469    };
470}
471
472macro_rules! make_literal_typed {
473    ($TYPE:ty, $SCALAR:ident) => {
474        impl TypedLiteral for $TYPE {
475            fn typed_lit(self) -> Expr {
476                Expr::Literal(Scalar::from(self).into())
477            }
478        }
479    };
480}
481
482macro_rules! make_dyn_lit {
483    ($TYPE:ty, $SCALAR:ident) => {
484        impl Literal for $TYPE {
485            fn lit(self) -> Expr {
486                Expr::Literal(LiteralValue::Dyn(DynLiteralValue::$SCALAR(
487                    self.try_into().unwrap(),
488                )))
489            }
490        }
491    };
492}
493
494make_literal!(bool, Boolean);
495make_literal_typed!(f32, Float32);
496make_literal_typed!(f64, Float64);
497make_literal_typed!(i8, Int8);
498make_literal_typed!(i16, Int16);
499make_literal_typed!(i32, Int32);
500make_literal_typed!(i64, Int64);
501make_literal_typed!(i128, Int128);
502make_literal_typed!(u8, UInt8);
503make_literal_typed!(u16, UInt16);
504make_literal_typed!(u32, UInt32);
505make_literal_typed!(u64, UInt64);
506
507make_dyn_lit!(f32, Float);
508make_dyn_lit!(f64, Float);
509make_dyn_lit!(i8, Int);
510make_dyn_lit!(i16, Int);
511make_dyn_lit!(i32, Int);
512make_dyn_lit!(i64, Int);
513make_dyn_lit!(u8, Int);
514make_dyn_lit!(u16, Int);
515make_dyn_lit!(u32, Int);
516make_dyn_lit!(u64, Int);
517make_dyn_lit!(i128, Int);
518
519/// The literal Null
520pub struct Null {}
521pub const NULL: Null = Null {};
522
523impl Literal for Null {
524    fn lit(self) -> Expr {
525        Expr::Literal(LiteralValue::Scalar(Scalar::null(DataType::Null)))
526    }
527}
528
529#[cfg(feature = "dtype-datetime")]
530impl Literal for NaiveDateTime {
531    fn lit(self) -> Expr {
532        if in_nanoseconds_window(&self) {
533            Expr::Literal(
534                Scalar::new_datetime(
535                    self.and_utc().timestamp_nanos_opt().unwrap(),
536                    TimeUnit::Nanoseconds,
537                    None,
538                )
539                .into(),
540            )
541        } else {
542            Expr::Literal(
543                Scalar::new_datetime(
544                    self.and_utc().timestamp_micros(),
545                    TimeUnit::Microseconds,
546                    None,
547                )
548                .into(),
549            )
550        }
551    }
552}
553
554#[cfg(feature = "dtype-duration")]
555impl Literal for ChronoDuration {
556    fn lit(self) -> Expr {
557        if let Some(value) = self.num_nanoseconds() {
558            Expr::Literal(Scalar::new_duration(value, TimeUnit::Nanoseconds).into())
559        } else {
560            Expr::Literal(
561                Scalar::new_duration(self.num_microseconds().unwrap(), TimeUnit::Microseconds)
562                    .into(),
563            )
564        }
565    }
566}
567
568#[cfg(feature = "dtype-duration")]
569impl Literal for Duration {
570    fn lit(self) -> Expr {
571        assert!(
572            self.months() == 0,
573            "Cannot create literal duration that is not of fixed length; found {self}"
574        );
575        let ns = self.duration_ns();
576        Expr::Literal(
577            Scalar::new_duration(
578                if self.negative() { -ns } else { ns },
579                TimeUnit::Nanoseconds,
580            )
581            .into(),
582        )
583    }
584}
585
586#[cfg(feature = "dtype-datetime")]
587impl Literal for NaiveDate {
588    fn lit(self) -> Expr {
589        self.and_hms_opt(0, 0, 0).unwrap().lit()
590    }
591}
592
593impl Literal for Series {
594    fn lit(self) -> Expr {
595        Expr::Literal(LiteralValue::Series(SpecialEq::new(self)))
596    }
597}
598
599impl Literal for LiteralValue {
600    fn lit(self) -> Expr {
601        Expr::Literal(self)
602    }
603}
604
605impl Literal for Scalar {
606    fn lit(self) -> Expr {
607        Expr::Literal(self.into())
608    }
609}
610
611/// Create a Literal Expression from `L`. A literal expression behaves like a column that contains a single distinct
612/// value.
613///
614/// The column is automatically of the "correct" length to make the operations work. Often this is determined by the
615/// length of the `LazyFrame` it is being used with. For instance, `lazy_df.with_column(lit(5).alias("five"))` creates a
616/// new column named "five" that is the length of the Dataframe (at the time `collect` is called), where every value in
617/// the column is `5`.
618pub fn lit<L: Literal>(t: L) -> Expr {
619    t.lit()
620}
621
622pub fn typed_lit<L: TypedLiteral>(t: L) -> Expr {
623    t.typed_lit()
624}
625
626impl Hash for LiteralValue {
627    fn hash<H: Hasher>(&self, state: &mut H) {
628        std::mem::discriminant(self).hash(state);
629        match self {
630            LiteralValue::Series(s) => {
631                // Free stats
632                s.dtype().hash(state);
633                let len = s.len();
634                len.hash(state);
635                s.null_count().hash(state);
636                const RANDOM: u64 = 0x2c194fa5df32a367;
637                let mut rng = (len as u64) ^ RANDOM;
638                for _ in 0..std::cmp::min(5, len) {
639                    let idx = hash_to_partition(rng, len);
640                    s.get(idx).unwrap().hash(state);
641                    rng = rng.rotate_right(17).wrapping_add(RANDOM);
642                }
643            },
644            LiteralValue::Range(range) => range.hash(state),
645            LiteralValue::Scalar(sc) => sc.hash(state),
646            LiteralValue::Dyn(d) => d.hash(state),
647        }
648    }
649}