Enum polars_core::prelude::RevMapping
source · Variants§
Global(PlHashMap<u32, u32>, Utf8Array<i64>, u128)
Hashmap: maps the indexes from the global cache/categorical array to indexes in the local Utf8Array Utf8Array: caches the string values
Local(Utf8Array<i64>)
Utf8Array: caches the string values
Implementations§
source§impl RevMapping
impl RevMapping
pub fn is_global(&self) -> bool
sourcepub fn len(&self) -> usize
pub fn len(&self) -> usize
Get the length of the RevMapping
Examples found in repository?
More examples
src/frame/groupby/into_groups.rs (line 34)
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
fn num_groups_proxy<T>(ca: &ChunkedArray<T>, multithreaded: bool, sorted: bool) -> GroupsProxy
where
T: PolarsIntegerType,
T::Native: Hash + Eq + Send + AsU64,
Option<T::Native>: AsU64,
{
// set group size hint
#[cfg(feature = "dtype-categorical")]
let group_size_hint = if let DataType::Categorical(Some(m)) = ca.dtype() {
ca.len() / m.len()
} else {
0
};
#[cfg(not(feature = "dtype-categorical"))]
let group_size_hint = 0;
if multithreaded && group_multithreaded(ca) {
let n_partitions = _set_partition_size() as u64;
// use the arrays as iterators
if ca.chunks.len() == 1 {
if !ca.has_validity() {
let keys = vec![ca.cont_slice().unwrap()];
groupby_threaded_num(keys, group_size_hint, n_partitions, sorted)
} else {
let keys = ca
.downcast_iter()
.map(|arr| arr.into_iter().map(|x| x.copied()).collect::<Vec<_>>())
.collect::<Vec<_>>();
groupby_threaded_num(keys, group_size_hint, n_partitions, sorted)
}
// use the polars-iterators
} else if !ca.has_validity() {
let keys = vec![ca.into_no_null_iter().collect::<Vec<_>>()];
groupby_threaded_num(keys, group_size_hint, n_partitions, sorted)
} else {
let keys = vec![ca.into_iter().collect::<Vec<_>>()];
groupby_threaded_num(keys, group_size_hint, n_partitions, sorted)
}
} else if !ca.has_validity() {
groupby(ca.into_no_null_iter(), sorted)
} else {
groupby(ca.into_iter(), sorted)
}
}sourcepub fn get(&self, idx: u32) -> &str
pub fn get(&self, idx: u32) -> &str
Categorical to str
Examples found in repository?
More examples
src/chunked_array/logical/categorical/mod.rs (line 163)
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
fn cast(&self, dtype: &DataType) -> PolarsResult<Series> {
match dtype {
DataType::Utf8 => {
let mapping = &**self.get_rev_map();
let mut builder =
Utf8ChunkedBuilder::new(self.logical.name(), self.len(), self.len() * 5);
let f = |idx: u32| mapping.get(idx);
if !self.logical.has_validity() {
self.logical
.into_no_null_iter()
.for_each(|idx| builder.append_value(f(idx)));
} else {
self.logical.into_iter().for_each(|opt_idx| {
builder.append_option(opt_idx.map(f));
});
}
let ca = builder.finish();
Ok(ca.into_series())
}
DataType::UInt32 => {
let ca =
UInt32Chunked::from_chunks(self.logical.name(), self.logical.chunks.clone());
Ok(ca.into_series())
}
#[cfg(feature = "dtype-categorical")]
DataType::Categorical(_) => Ok(self.clone().into_series()),
_ => self.logical.cast(dtype),
}
}src/fmt.rs (line 749)
681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
let width = 0;
match self {
AnyValue::Null => write!(f, "null"),
AnyValue::UInt8(v) => write!(f, "{v}"),
AnyValue::UInt16(v) => write!(f, "{v}"),
AnyValue::UInt32(v) => write!(f, "{v}"),
AnyValue::UInt64(v) => write!(f, "{v}"),
AnyValue::Int8(v) => fmt_integer(f, width, *v),
AnyValue::Int16(v) => fmt_integer(f, width, *v),
AnyValue::Int32(v) => fmt_integer(f, width, *v),
AnyValue::Int64(v) => fmt_integer(f, width, *v),
AnyValue::Float32(v) => fmt_float(f, width, *v),
AnyValue::Float64(v) => fmt_float(f, width, *v),
AnyValue::Boolean(v) => write!(f, "{}", *v),
AnyValue::Utf8(v) => write!(f, "{}", format_args!("\"{v}\"")),
AnyValue::Utf8Owned(v) => write!(f, "{}", format_args!("\"{v}\"")),
#[cfg(feature = "dtype-binary")]
AnyValue::Binary(_) | AnyValue::BinaryOwned(_) => write!(f, "[binary data]"),
#[cfg(feature = "dtype-date")]
AnyValue::Date(v) => write!(f, "{}", date32_to_date(*v)),
#[cfg(feature = "dtype-datetime")]
AnyValue::Datetime(v, tu, tz) => {
let ndt = match tu {
TimeUnit::Nanoseconds => timestamp_ns_to_datetime(*v),
TimeUnit::Microseconds => timestamp_us_to_datetime(*v),
TimeUnit::Milliseconds => timestamp_ms_to_datetime(*v),
};
match tz {
None => write!(f, "{ndt}"),
Some(_tz) => {
#[cfg(feature = "timezones")]
{
match _tz.parse::<chrono_tz::Tz>() {
Ok(tz) => {
let dt_utc = chrono::Utc.from_local_datetime(&ndt).unwrap();
let dt_tz_aware = dt_utc.with_timezone(&tz);
write!(f, "{dt_tz_aware}")
}
Err(_) => match parse_offset(_tz) {
Ok(offset) => {
let dt_tz_aware = offset.from_utc_datetime(&ndt);
write!(f, "{dt_tz_aware}")
}
Err(_) => write!(f, "invalid timezone"),
},
}
}
#[cfg(not(feature = "timezones"))]
{
panic!("activate 'timezones' feature")
}
}
}
}
#[cfg(feature = "dtype-duration")]
AnyValue::Duration(v, tu) => match tu {
TimeUnit::Nanoseconds => fmt_duration_ns(f, *v),
TimeUnit::Microseconds => fmt_duration_us(f, *v),
TimeUnit::Milliseconds => fmt_duration_ms(f, *v),
},
#[cfg(feature = "dtype-time")]
AnyValue::Time(_) => {
let nt: chrono::NaiveTime = self.into();
write!(f, "{nt}")
}
#[cfg(feature = "dtype-categorical")]
AnyValue::Categorical(idx, rev) => {
let s = rev.get(*idx);
write!(f, "\"{s}\"")
}
AnyValue::List(s) => write!(f, "{}", s.fmt_list()),
#[cfg(feature = "object")]
AnyValue::Object(v) => write!(f, "{v}"),
#[cfg(feature = "dtype-struct")]
av @ AnyValue::Struct(_, _, _) => {
let mut avs = vec![];
av._materialize_struct_av(&mut avs);
fmt_struct(f, &avs)
}
#[cfg(feature = "dtype-struct")]
AnyValue::StructOwned(payload) => fmt_struct(f, &payload.0),
}
}sourcepub fn same_src(&self, other: &Self) -> bool
pub fn same_src(&self, other: &Self) -> bool
Check if the categoricals are created under the same global string cache.
Examples found in repository?
src/frame/hash_join/mod.rs (line 111)
108 109 110 111 112 113 114 115 116 117 118 119
pub fn _check_categorical_src(l: &DataType, r: &DataType) -> PolarsResult<()> {
match (l, r) {
(DataType::Categorical(Some(l)), DataType::Categorical(Some(r))) => {
if !l.same_src(r) {
return Err(PolarsError::ComputeError("Joins/or comparisons on categorical dtypes can only happen if they are created under the same global string cache.\
Hint: set a global StringCache".into()));
}
Ok(())
}
_ => Ok(()),
}
}More examples
src/series/comparison.rs (line 160)
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
fn equal(&self, rhs: &Series) -> PolarsResult<BooleanChunked> {
validate_types(self.dtype(), rhs.dtype())?;
#[cfg(feature = "dtype-categorical")]
use DataType::*;
let mut out = match (self.dtype(), rhs.dtype(), self.len(), rhs.len()) {
#[cfg(feature = "dtype-categorical")]
(Categorical(_), Utf8, _, 1) => {
return compare_cat_to_str_series(
self,
rhs,
self.name(),
|s, idx| s.equal(idx),
false,
);
}
#[cfg(feature = "dtype-categorical")]
(Utf8, Categorical(_), 1, _) => {
return compare_cat_to_str_series(
rhs,
self,
self.name(),
|s, idx| s.equal(idx),
false,
);
}
#[cfg(feature = "dtype-categorical")]
(Categorical(Some(rev_map_l)), Categorical(Some(rev_map_r)), _, _) => {
if rev_map_l.same_src(rev_map_r) {
self.categorical()
.unwrap()
.logical()
.equal(rhs.categorical().unwrap().logical())
} else {
return Err(PolarsError::ComputeError("Cannot compare categoricals originating from different sources. Consider setting a global string cache.".into()));
}
}
_ => {
impl_compare!(self, rhs, equal)
}
};
out.rename(self.name());
Ok(out)
}
/// Create a boolean mask by checking for inequality.
fn not_equal(&self, rhs: &Series) -> PolarsResult<BooleanChunked> {
validate_types(self.dtype(), rhs.dtype())?;
#[cfg(feature = "dtype-categorical")]
use DataType::*;
let mut out = match (self.dtype(), rhs.dtype(), self.len(), rhs.len()) {
#[cfg(feature = "dtype-categorical")]
(Categorical(_), Utf8, _, 1) => {
return compare_cat_to_str_series(
self,
rhs,
self.name(),
|s, idx| s.not_equal(idx),
true,
);
}
#[cfg(feature = "dtype-categorical")]
(Utf8, Categorical(_), 1, _) => {
return compare_cat_to_str_series(
rhs,
self,
self.name(),
|s, idx| s.not_equal(idx),
true,
);
}
#[cfg(feature = "dtype-categorical")]
(Categorical(Some(rev_map_l)), Categorical(Some(rev_map_r)), _, _) => {
if rev_map_l.same_src(rev_map_r) {
self.categorical()
.unwrap()
.logical()
.not_equal(rhs.categorical().unwrap().logical())
} else {
return Err(PolarsError::ComputeError("Cannot compare categoricals originating from different sources. Consider setting a global string cache.".into()));
}
}
_ => {
impl_compare!(self, rhs, not_equal)
}
};
out.rename(self.name());
Ok(out)
}sourcepub fn find(&self, value: &str) -> Option<u32>
pub fn find(&self, value: &str) -> Option<u32>
str to Categorical
Examples found in repository?
src/series/comparison.rs (line 76)
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
fn compare_cat_to_str_value<Compare>(
cat: &Series,
value: &str,
name: &str,
compare: Compare,
fill_value: bool,
) -> PolarsResult<BooleanChunked>
where
Compare: Fn(&Series, u32) -> PolarsResult<BooleanChunked>,
{
let cat = cat.categorical().expect("should be categorical");
let cat_map = cat.get_rev_map();
match cat_map.find(value) {
None => Ok(BooleanChunked::full(name, fill_value, cat.len())),
Some(cat_idx) => {
let cat = cat.cast(&DataType::UInt32).unwrap();
compare(&cat, cat_idx)
}
}
}More examples
src/chunked_array/ops/is_in.rs (line 145)
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
fn is_in(&self, other: &Series) -> PolarsResult<BooleanChunked> {
match other.dtype() {
#[cfg(feature = "dtype-categorical")]
DataType::List(dt) if matches!(&**dt, DataType::Categorical(_)) => {
if let DataType::Categorical(Some(rev_map)) = &**dt {
let opt_val = self.get(0);
let other = other.list()?;
match opt_val {
None => {
let mut ca: BooleanChunked = other
.amortized_iter()
.map(|opt_s| {
opt_s.map(|s| s.as_ref().null_count() > 0) == Some(true)
})
.collect_trusted();
ca.rename(self.name());
Ok(ca)
}
Some(value) => {
match rev_map.find(value) {
// all false
None => Ok(BooleanChunked::full(self.name(), false, other.len())),
Some(idx) => {
let mut ca: BooleanChunked = other
.amortized_iter()
.map(|opt_s| {
opt_s.map(|s| {
let s = s.as_ref().to_physical_repr();
let ca = s.as_ref().u32().unwrap();
if ca.null_count() == 0 {
ca.into_no_null_iter().any(|a| a == idx)
} else {
ca.into_iter().any(|a| a == Some(idx))
}
}) == Some(true)
})
.collect_trusted();
ca.rename(self.name());
Ok(ca)
}
}
}
}
} else {
unreachable!()
}
}
DataType::List(dt) if DataType::Utf8 == **dt => {
let mut ca: BooleanChunked = if self.len() == 1 && other.len() != 1 {
let value = self.get(0);
other
.list()?
.amortized_iter()
.map(|opt_s| {
opt_s.map(|s| {
let ca = s.as_ref().unpack::<Utf8Type>().unwrap();
ca.into_iter().any(|a| a == value)
}) == Some(true)
})
.collect_trusted()
} else {
self.into_iter()
.zip(other.list()?.amortized_iter())
.map(|(value, series)| match (value, series) {
(val, Some(series)) => {
let ca = series.as_ref().unpack::<Utf8Type>().unwrap();
ca.into_iter().any(|a| a == val)
}
_ => false,
})
.collect_trusted()
};
ca.rename(self.name());
Ok(ca)
}
DataType::Utf8 => {
let mut set = HashSet::with_capacity(other.len());
let other = other.utf8()?;
other.downcast_iter().for_each(|iter| {
iter.into_iter().for_each(|opt_val| {
set.insert(opt_val);
})
});
let mut ca: BooleanChunked = self
.into_iter()
.map(|opt_val| set.contains(&opt_val))
.collect_trusted();
ca.rename(self.name());
Ok(ca)
}
_ => Err(PolarsError::SchemaMisMatch(
format!(
"cannot do is_in operation with left a dtype: {:?} and right a dtype {:?}",
self.dtype(),
other.dtype()
)
.into(),
)),
}
.map(|mut ca| {
ca.rename(self.name());
ca
})
}Trait Implementations§
source§impl Clone for RevMapping
impl Clone for RevMapping
source§fn clone(&self) -> RevMapping
fn clone(&self) -> RevMapping
Returns a copy of the value. Read more
1.0.0 · source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read more