pub enum RevMapping {
    Global(PlHashMap<u32, u32>, Utf8Array<i64>, u128),
    Local(Utf8Array<i64>),
}

Variants§

§

Global(PlHashMap<u32, u32>, Utf8Array<i64>, u128)

Hashmap: maps the indexes from the global cache/categorical array to indexes in the local Utf8Array Utf8Array: caches the string values

§

Local(Utf8Array<i64>)

Utf8Array: caches the string values

Implementations§

Get the length of the RevMapping

Examples found in repository?
src/chunked_array/logical/categorical/ops/unique.rs (line 39)
37
38
39
40
41
42
43
    pub fn n_unique(&self) -> PolarsResult<usize> {
        if self.can_fast_unique() {
            Ok(self.get_rev_map().len())
        } else {
            self.logical().n_unique()
        }
    }
More examples
Hide additional examples
src/frame/groupby/into_groups.rs (line 34)
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
fn num_groups_proxy<T>(ca: &ChunkedArray<T>, multithreaded: bool, sorted: bool) -> GroupsProxy
where
    T: PolarsIntegerType,
    T::Native: Hash + Eq + Send + AsU64,
    Option<T::Native>: AsU64,
{
    // set group size hint
    #[cfg(feature = "dtype-categorical")]
    let group_size_hint = if let DataType::Categorical(Some(m)) = ca.dtype() {
        ca.len() / m.len()
    } else {
        0
    };
    #[cfg(not(feature = "dtype-categorical"))]
    let group_size_hint = 0;

    if multithreaded && group_multithreaded(ca) {
        let n_partitions = _set_partition_size() as u64;

        // use the arrays as iterators
        if ca.chunks.len() == 1 {
            if !ca.has_validity() {
                let keys = vec![ca.cont_slice().unwrap()];
                groupby_threaded_num(keys, group_size_hint, n_partitions, sorted)
            } else {
                let keys = ca
                    .downcast_iter()
                    .map(|arr| arr.into_iter().map(|x| x.copied()).collect::<Vec<_>>())
                    .collect::<Vec<_>>();
                groupby_threaded_num(keys, group_size_hint, n_partitions, sorted)
            }
            // use the polars-iterators
        } else if !ca.has_validity() {
            let keys = vec![ca.into_no_null_iter().collect::<Vec<_>>()];
            groupby_threaded_num(keys, group_size_hint, n_partitions, sorted)
        } else {
            let keys = vec![ca.into_iter().collect::<Vec<_>>()];
            groupby_threaded_num(keys, group_size_hint, n_partitions, sorted)
        }
    } else if !ca.has_validity() {
        groupby(ca.into_no_null_iter(), sorted)
    } else {
        groupby(ca.into_iter(), sorted)
    }
}

Categorical to str

Examples found in repository?
src/series/mod.rs (line 831)
826
827
828
829
830
831
832
833
834
835
    pub fn str_value(&self, index: usize) -> PolarsResult<Cow<str>> {
        let out = match self.0.get(index)? {
            AnyValue::Utf8(s) => Cow::Borrowed(s),
            AnyValue::Null => Cow::Borrowed("null"),
            #[cfg(feature = "dtype-categorical")]
            AnyValue::Categorical(idx, rev) => Cow::Borrowed(rev.get(idx)),
            av => Cow::Owned(format!("{av}")),
        };
        Ok(out)
    }
More examples
Hide additional examples
src/chunked_array/logical/categorical/mod.rs (line 163)
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
    fn cast(&self, dtype: &DataType) -> PolarsResult<Series> {
        match dtype {
            DataType::Utf8 => {
                let mapping = &**self.get_rev_map();

                let mut builder =
                    Utf8ChunkedBuilder::new(self.logical.name(), self.len(), self.len() * 5);

                let f = |idx: u32| mapping.get(idx);

                if !self.logical.has_validity() {
                    self.logical
                        .into_no_null_iter()
                        .for_each(|idx| builder.append_value(f(idx)));
                } else {
                    self.logical.into_iter().for_each(|opt_idx| {
                        builder.append_option(opt_idx.map(f));
                    });
                }

                let ca = builder.finish();
                Ok(ca.into_series())
            }
            DataType::UInt32 => {
                let ca =
                    UInt32Chunked::from_chunks(self.logical.name(), self.logical.chunks.clone());
                Ok(ca.into_series())
            }
            #[cfg(feature = "dtype-categorical")]
            DataType::Categorical(_) => Ok(self.clone().into_series()),
            _ => self.logical.cast(dtype),
        }
    }
src/fmt.rs (line 749)
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        let width = 0;
        match self {
            AnyValue::Null => write!(f, "null"),
            AnyValue::UInt8(v) => write!(f, "{v}"),
            AnyValue::UInt16(v) => write!(f, "{v}"),
            AnyValue::UInt32(v) => write!(f, "{v}"),
            AnyValue::UInt64(v) => write!(f, "{v}"),
            AnyValue::Int8(v) => fmt_integer(f, width, *v),
            AnyValue::Int16(v) => fmt_integer(f, width, *v),
            AnyValue::Int32(v) => fmt_integer(f, width, *v),
            AnyValue::Int64(v) => fmt_integer(f, width, *v),
            AnyValue::Float32(v) => fmt_float(f, width, *v),
            AnyValue::Float64(v) => fmt_float(f, width, *v),
            AnyValue::Boolean(v) => write!(f, "{}", *v),
            AnyValue::Utf8(v) => write!(f, "{}", format_args!("\"{v}\"")),
            AnyValue::Utf8Owned(v) => write!(f, "{}", format_args!("\"{v}\"")),
            #[cfg(feature = "dtype-binary")]
            AnyValue::Binary(_) | AnyValue::BinaryOwned(_) => write!(f, "[binary data]"),
            #[cfg(feature = "dtype-date")]
            AnyValue::Date(v) => write!(f, "{}", date32_to_date(*v)),
            #[cfg(feature = "dtype-datetime")]
            AnyValue::Datetime(v, tu, tz) => {
                let ndt = match tu {
                    TimeUnit::Nanoseconds => timestamp_ns_to_datetime(*v),
                    TimeUnit::Microseconds => timestamp_us_to_datetime(*v),
                    TimeUnit::Milliseconds => timestamp_ms_to_datetime(*v),
                };
                match tz {
                    None => write!(f, "{ndt}"),
                    Some(_tz) => {
                        #[cfg(feature = "timezones")]
                        {
                            match _tz.parse::<chrono_tz::Tz>() {
                                Ok(tz) => {
                                    let dt_utc = chrono::Utc.from_local_datetime(&ndt).unwrap();
                                    let dt_tz_aware = dt_utc.with_timezone(&tz);
                                    write!(f, "{dt_tz_aware}")
                                }
                                Err(_) => match parse_offset(_tz) {
                                    Ok(offset) => {
                                        let dt_tz_aware = offset.from_utc_datetime(&ndt);
                                        write!(f, "{dt_tz_aware}")
                                    }
                                    Err(_) => write!(f, "invalid timezone"),
                                },
                            }
                        }
                        #[cfg(not(feature = "timezones"))]
                        {
                            panic!("activate 'timezones' feature")
                        }
                    }
                }
            }
            #[cfg(feature = "dtype-duration")]
            AnyValue::Duration(v, tu) => match tu {
                TimeUnit::Nanoseconds => fmt_duration_ns(f, *v),
                TimeUnit::Microseconds => fmt_duration_us(f, *v),
                TimeUnit::Milliseconds => fmt_duration_ms(f, *v),
            },
            #[cfg(feature = "dtype-time")]
            AnyValue::Time(_) => {
                let nt: chrono::NaiveTime = self.into();
                write!(f, "{nt}")
            }
            #[cfg(feature = "dtype-categorical")]
            AnyValue::Categorical(idx, rev) => {
                let s = rev.get(*idx);
                write!(f, "\"{s}\"")
            }
            AnyValue::List(s) => write!(f, "{}", s.fmt_list()),
            #[cfg(feature = "object")]
            AnyValue::Object(v) => write!(f, "{v}"),
            #[cfg(feature = "dtype-struct")]
            av @ AnyValue::Struct(_, _, _) => {
                let mut avs = vec![];
                av._materialize_struct_av(&mut avs);
                fmt_struct(f, &avs)
            }
            #[cfg(feature = "dtype-struct")]
            AnyValue::StructOwned(payload) => fmt_struct(f, &payload.0),
        }
    }

Check if the categoricals are created under the same global string cache.

Examples found in repository?
src/frame/hash_join/mod.rs (line 111)
108
109
110
111
112
113
114
115
116
117
118
119
pub fn _check_categorical_src(l: &DataType, r: &DataType) -> PolarsResult<()> {
    match (l, r) {
        (DataType::Categorical(Some(l)), DataType::Categorical(Some(r))) => {
            if !l.same_src(r) {
                return Err(PolarsError::ComputeError("Joins/or comparisons on categorical dtypes can only happen if they are created under the same global string cache.\
                  Hint: set a global StringCache".into()));
            }
            Ok(())
        }
        _ => Ok(()),
    }
}
More examples
Hide additional examples
src/series/comparison.rs (line 160)
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
    fn equal(&self, rhs: &Series) -> PolarsResult<BooleanChunked> {
        validate_types(self.dtype(), rhs.dtype())?;
        #[cfg(feature = "dtype-categorical")]
        use DataType::*;
        let mut out = match (self.dtype(), rhs.dtype(), self.len(), rhs.len()) {
            #[cfg(feature = "dtype-categorical")]
            (Categorical(_), Utf8, _, 1) => {
                return compare_cat_to_str_series(
                    self,
                    rhs,
                    self.name(),
                    |s, idx| s.equal(idx),
                    false,
                );
            }
            #[cfg(feature = "dtype-categorical")]
            (Utf8, Categorical(_), 1, _) => {
                return compare_cat_to_str_series(
                    rhs,
                    self,
                    self.name(),
                    |s, idx| s.equal(idx),
                    false,
                );
            }
            #[cfg(feature = "dtype-categorical")]
            (Categorical(Some(rev_map_l)), Categorical(Some(rev_map_r)), _, _) => {
                if rev_map_l.same_src(rev_map_r) {
                    self.categorical()
                        .unwrap()
                        .logical()
                        .equal(rhs.categorical().unwrap().logical())
                } else {
                    return Err(PolarsError::ComputeError("Cannot compare categoricals originating from different sources. Consider setting a global string cache.".into()));
                }
            }
            _ => {
                impl_compare!(self, rhs, equal)
            }
        };
        out.rename(self.name());
        Ok(out)
    }

    /// Create a boolean mask by checking for inequality.
    fn not_equal(&self, rhs: &Series) -> PolarsResult<BooleanChunked> {
        validate_types(self.dtype(), rhs.dtype())?;
        #[cfg(feature = "dtype-categorical")]
        use DataType::*;
        let mut out = match (self.dtype(), rhs.dtype(), self.len(), rhs.len()) {
            #[cfg(feature = "dtype-categorical")]
            (Categorical(_), Utf8, _, 1) => {
                return compare_cat_to_str_series(
                    self,
                    rhs,
                    self.name(),
                    |s, idx| s.not_equal(idx),
                    true,
                );
            }
            #[cfg(feature = "dtype-categorical")]
            (Utf8, Categorical(_), 1, _) => {
                return compare_cat_to_str_series(
                    rhs,
                    self,
                    self.name(),
                    |s, idx| s.not_equal(idx),
                    true,
                );
            }
            #[cfg(feature = "dtype-categorical")]
            (Categorical(Some(rev_map_l)), Categorical(Some(rev_map_r)), _, _) => {
                if rev_map_l.same_src(rev_map_r) {
                    self.categorical()
                        .unwrap()
                        .logical()
                        .not_equal(rhs.categorical().unwrap().logical())
                } else {
                    return Err(PolarsError::ComputeError("Cannot compare categoricals originating from different sources. Consider setting a global string cache.".into()));
                }
            }
            _ => {
                impl_compare!(self, rhs, not_equal)
            }
        };
        out.rename(self.name());
        Ok(out)
    }

str to Categorical

Examples found in repository?
src/series/comparison.rs (line 76)
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
fn compare_cat_to_str_value<Compare>(
    cat: &Series,
    value: &str,
    name: &str,
    compare: Compare,
    fill_value: bool,
) -> PolarsResult<BooleanChunked>
where
    Compare: Fn(&Series, u32) -> PolarsResult<BooleanChunked>,
{
    let cat = cat.categorical().expect("should be categorical");
    let cat_map = cat.get_rev_map();
    match cat_map.find(value) {
        None => Ok(BooleanChunked::full(name, fill_value, cat.len())),
        Some(cat_idx) => {
            let cat = cat.cast(&DataType::UInt32).unwrap();
            compare(&cat, cat_idx)
        }
    }
}
More examples
Hide additional examples
src/chunked_array/ops/is_in.rs (line 145)
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
    fn is_in(&self, other: &Series) -> PolarsResult<BooleanChunked> {
        match other.dtype() {
            #[cfg(feature = "dtype-categorical")]
            DataType::List(dt) if matches!(&**dt, DataType::Categorical(_)) => {
                if let DataType::Categorical(Some(rev_map)) = &**dt {
                    let opt_val = self.get(0);

                    let other = other.list()?;
                    match opt_val {
                        None => {
                            let mut ca: BooleanChunked = other
                                .amortized_iter()
                                .map(|opt_s| {
                                    opt_s.map(|s| s.as_ref().null_count() > 0) == Some(true)
                                })
                                .collect_trusted();
                            ca.rename(self.name());
                            Ok(ca)
                        }
                        Some(value) => {
                            match rev_map.find(value) {
                                // all false
                                None => Ok(BooleanChunked::full(self.name(), false, other.len())),
                                Some(idx) => {
                                    let mut ca: BooleanChunked = other
                                        .amortized_iter()
                                        .map(|opt_s| {
                                            opt_s.map(|s| {
                                                let s = s.as_ref().to_physical_repr();
                                                let ca = s.as_ref().u32().unwrap();
                                                if ca.null_count() == 0 {
                                                    ca.into_no_null_iter().any(|a| a == idx)
                                                } else {
                                                    ca.into_iter().any(|a| a == Some(idx))
                                                }
                                            }) == Some(true)
                                        })
                                        .collect_trusted();
                                    ca.rename(self.name());
                                    Ok(ca)
                                }
                            }
                        }
                    }
                } else {
                    unreachable!()
                }
            }
            DataType::List(dt) if DataType::Utf8 == **dt => {
                let mut ca: BooleanChunked = if self.len() == 1 && other.len() != 1 {
                    let value = self.get(0);
                    other
                        .list()?
                        .amortized_iter()
                        .map(|opt_s| {
                            opt_s.map(|s| {
                                let ca = s.as_ref().unpack::<Utf8Type>().unwrap();
                                ca.into_iter().any(|a| a == value)
                            }) == Some(true)
                        })
                        .collect_trusted()
                } else {
                    self.into_iter()
                        .zip(other.list()?.amortized_iter())
                        .map(|(value, series)| match (value, series) {
                            (val, Some(series)) => {
                                let ca = series.as_ref().unpack::<Utf8Type>().unwrap();
                                ca.into_iter().any(|a| a == val)
                            }
                            _ => false,
                        })
                        .collect_trusted()
                };
                ca.rename(self.name());
                Ok(ca)
            }
            DataType::Utf8 => {
                let mut set = HashSet::with_capacity(other.len());

                let other = other.utf8()?;
                other.downcast_iter().for_each(|iter| {
                    iter.into_iter().for_each(|opt_val| {
                        set.insert(opt_val);
                    })
                });
                let mut ca: BooleanChunked = self
                    .into_iter()
                    .map(|opt_val| set.contains(&opt_val))
                    .collect_trusted();
                ca.rename(self.name());
                Ok(ca)
            }
            _ => Err(PolarsError::SchemaMisMatch(
                format!(
                    "cannot do is_in operation with left a dtype: {:?} and right a dtype {:?}",
                    self.dtype(),
                    other.dtype()
                )
                .into(),
            )),
        }
        .map(|mut ca| {
            ca.rename(self.name());
            ca
        })
    }

Trait Implementations§

Returns a copy of the value. Read more
Performs copy-assignment from source. Read more
Formats the value using the given formatter. Read more
Returns the “default value” for a type. Read more

Auto Trait Implementations§

Blanket Implementations§

Gets the TypeId of self. Read more
Immutably borrows from an owned value. Read more
Mutably borrows from an owned value. Read more

Returns the argument unchanged.

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

The alignment of pointer.
The type for initializers.
Initializes a with the given initializer. Read more
Dereferences the given pointer. Read more
Mutably dereferences the given pointer. Read more
Drops the object pointed to by the given pointer. Read more
The resulting type after obtaining ownership.
Creates owned data from borrowed data, usually by cloning. Read more
Uses borrowed data to replace owned data, usually by cloning. Read more
The type returned in the event of a conversion error.
Performs the conversion.
The type returned in the event of a conversion error.
Performs the conversion.