1use core::fmt::{Debug, Formatter};
2use core::hash::{Hash, Hasher};
3
4use indexmap::map::MutableKeys;
5use polars_error::{PolarsError, PolarsResult, polars_bail, polars_ensure, polars_err};
6use polars_utils::aliases::{InitHashMaps, PlIndexMap};
7use polars_utils::pl_str::PlSmallStr;
8
9#[derive(Clone, Default)]
10#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
11pub struct Schema<D> {
12 fields: PlIndexMap<PlSmallStr, D>,
13}
14
15impl<D: Eq> Eq for Schema<D> {}
16
17impl<D> Schema<D> {
18 pub fn with_capacity(capacity: usize) -> Self {
19 let fields = PlIndexMap::with_capacity(capacity);
20 Self { fields }
21 }
22
23 pub fn reserve(&mut self, additional: usize) {
25 self.fields.reserve(additional);
26 }
27
28 #[inline]
30 pub fn len(&self) -> usize {
31 self.fields.len()
32 }
33
34 #[inline]
35 pub fn is_empty(&self) -> bool {
36 self.fields.is_empty()
37 }
38
39 pub fn rename(&mut self, old: &str, new: PlSmallStr) -> Option<PlSmallStr> {
44 let (old_index, old_name, dtype) = self.fields.swap_remove_full(old)?;
46 let (new_index, _) = self.fields.insert_full(new, dtype);
48 self.fields.swap_indices(old_index, new_index);
51
52 Some(old_name)
53 }
54
55 pub fn insert(&mut self, key: PlSmallStr, value: D) -> Option<D> {
56 self.fields.insert(key, value)
57 }
58
59 pub fn insert_at_index(
74 &mut self,
75 mut index: usize,
76 name: PlSmallStr,
77 dtype: D,
78 ) -> PolarsResult<Option<D>> {
79 polars_ensure!(
80 index <= self.len(),
81 OutOfBounds:
82 "index {} is out of bounds for schema with length {} (the max index allowed is self.len())",
83 index,
84 self.len()
85 );
86
87 let (old_index, old_dtype) = self.fields.insert_full(name, dtype);
88
89 if old_dtype.is_some() && index == self.len() {
92 index -= 1;
93 }
94 self.fields.move_index(old_index, index);
95 Ok(old_dtype)
96 }
97
98 pub fn get(&self, name: &str) -> Option<&D> {
100 self.fields.get(name)
101 }
102
103 pub fn get_mut(&mut self, name: &str) -> Option<&mut D> {
105 self.fields.get_mut(name)
106 }
107
108 pub fn try_get(&self, name: &str) -> PolarsResult<&D> {
110 self.get(name)
111 .ok_or_else(|| polars_err!(SchemaFieldNotFound: "{}", name))
112 }
113
114 pub fn try_get_mut(&mut self, name: &str) -> PolarsResult<&mut D> {
116 self.fields
117 .get_mut(name)
118 .ok_or_else(|| polars_err!(SchemaFieldNotFound: "{}", name))
119 }
120
121 pub fn get_full(&self, name: &str) -> Option<(usize, &PlSmallStr, &D)> {
125 self.fields.get_full(name)
126 }
127
128 pub fn try_get_full(&self, name: &str) -> PolarsResult<(usize, &PlSmallStr, &D)> {
132 self.fields
133 .get_full(name)
134 .ok_or_else(|| polars_err!(SchemaFieldNotFound: "{}", name))
135 }
136
137 pub fn get_at_index(&self, index: usize) -> Option<(&PlSmallStr, &D)> {
142 self.fields.get_index(index)
143 }
144
145 pub fn try_get_at_index(&self, index: usize) -> PolarsResult<(&PlSmallStr, &D)> {
146 self.fields.get_index(index).ok_or_else(|| polars_err!(ComputeError: "index {index} out of bounds with 'schema' of len: {}", self.len()))
147 }
148
149 pub fn get_at_index_mut(&mut self, index: usize) -> Option<(&mut PlSmallStr, &mut D)> {
154 self.fields.get_index_mut2(index)
155 }
156
157 pub fn remove(&mut self, name: &str) -> Option<D> {
165 self.fields.swap_remove(name)
166 }
167
168 pub fn shift_remove(&mut self, name: &str) -> Option<D> {
175 self.fields.shift_remove(name)
176 }
177
178 pub fn shift_remove_index(&mut self, index: usize) -> Option<(PlSmallStr, D)> {
185 self.fields.shift_remove_index(index)
186 }
187
188 pub fn contains(&self, name: &str) -> bool {
190 self.get(name).is_some()
191 }
192
193 pub fn set_dtype(&mut self, name: &str, dtype: D) -> Option<D> {
201 let old_dtype = self.fields.get_mut(name)?;
202 Some(std::mem::replace(old_dtype, dtype))
203 }
204
205 pub fn set_dtype_at_index(&mut self, index: usize, dtype: D) -> Option<D> {
213 let (_, old_dtype) = self.fields.get_index_mut(index)?;
214 Some(std::mem::replace(old_dtype, dtype))
215 }
216
217 pub fn with_column(&mut self, name: PlSmallStr, dtype: D) -> Option<D> {
224 self.fields.insert(name, dtype)
225 }
226
227 pub fn try_insert(&mut self, name: PlSmallStr, value: D) -> PolarsResult<()> {
229 if self.fields.contains_key(&name) {
230 polars_bail!(Duplicate: "column '{}' is duplicate", name)
231 }
232
233 self.fields.insert(name, value);
234
235 Ok(())
236 }
237
238 pub fn hstack_mut(
242 &mut self,
243 columns: impl IntoIterator<Item = impl Into<(PlSmallStr, D)>>,
244 ) -> PolarsResult<()> {
245 for v in columns {
246 let (k, v) = v.into();
247 self.try_insert(k, v)?;
248 }
249
250 Ok(())
251 }
252
253 pub fn hstack(
257 mut self,
258 columns: impl IntoIterator<Item = impl Into<(PlSmallStr, D)>>,
259 ) -> PolarsResult<Self> {
260 self.hstack_mut(columns)?;
261 Ok(self)
262 }
263
264 pub fn merge(&mut self, other: Self) {
272 self.fields.extend(other.fields)
273 }
274
275 pub fn iter(&self) -> impl ExactSizeIterator<Item = (&PlSmallStr, &D)> + '_ {
279 self.fields.iter()
280 }
281
282 pub fn iter_mut(&mut self) -> impl ExactSizeIterator<Item = (&PlSmallStr, &mut D)> + '_ {
283 self.fields.iter_mut()
284 }
285
286 pub fn iter_names(&self) -> impl '_ + ExactSizeIterator<Item = &PlSmallStr> {
288 self.fields.iter().map(|(name, _dtype)| name)
289 }
290
291 pub fn iter_names_cloned(&self) -> impl '_ + ExactSizeIterator<Item = PlSmallStr> {
292 self.iter_names().cloned()
293 }
294
295 pub fn iter_values(&self) -> impl '_ + ExactSizeIterator<Item = &D> {
297 self.fields.iter().map(|(_name, dtype)| dtype)
298 }
299
300 pub fn into_iter_values(self) -> impl ExactSizeIterator<Item = D> {
301 self.fields.into_values()
302 }
303
304 pub fn iter_values_mut(&mut self) -> impl '_ + ExactSizeIterator<Item = &mut D> {
306 self.fields.iter_mut().map(|(_name, dtype)| dtype)
307 }
308
309 pub fn index_of(&self, name: &str) -> Option<usize> {
310 self.fields.get_index_of(name)
311 }
312
313 pub fn try_index_of(&self, name: &str) -> PolarsResult<usize> {
314 let Some(i) = self.fields.get_index_of(name) else {
315 polars_bail!(
316 ColumnNotFound:
317 "unable to find column {:?}; valid columns: {:?}",
318 name, self.iter_names().collect::<Vec<_>>(),
319 )
320 };
321
322 Ok(i)
323 }
324
325 pub fn field_compare<'a, 'b>(
327 &'a self,
328 other: &'b Self,
329 self_extra: &mut Vec<(usize, (&'a PlSmallStr, &'a D))>,
330 other_extra: &mut Vec<(usize, (&'b PlSmallStr, &'b D))>,
331 ) {
332 self_extra.extend(
333 self.iter()
334 .enumerate()
335 .filter(|(_, (n, _))| !other.contains(n)),
336 );
337 other_extra.extend(
338 other
339 .iter()
340 .enumerate()
341 .filter(|(_, (n, _))| !self.contains(n)),
342 );
343 }
344}
345
346impl<D> Schema<D>
347where
348 D: Clone + Default,
349{
350 pub fn new_inserting_at_index(
363 &self,
364 index: usize,
365 name: PlSmallStr,
366 field: D,
367 ) -> PolarsResult<Self> {
368 polars_ensure!(
369 index <= self.len(),
370 OutOfBounds:
371 "index {} is out of bounds for schema with length {} (the max index allowed is self.len())",
372 index,
373 self.len()
374 );
375
376 let mut new = Self::default();
377 let mut iter = self.fields.iter().filter_map(|(fld_name, dtype)| {
378 (fld_name != &name).then_some((fld_name.clone(), dtype.clone()))
379 });
380 new.fields.extend(iter.by_ref().take(index));
381 new.fields.insert(name.clone(), field);
382 new.fields.extend(iter);
383 Ok(new)
384 }
385
386 pub fn merge_from_ref(&mut self, other: &Self) {
394 self.fields.extend(
395 other
396 .iter()
397 .map(|(column, field)| (column.clone(), field.clone())),
398 )
399 }
400
401 pub fn try_project<I>(&self, columns: I) -> PolarsResult<Self>
403 where
404 I: IntoIterator,
405 I::Item: AsRef<str>,
406 {
407 let schema = columns
408 .into_iter()
409 .map(|c| {
410 let name = c.as_ref();
411 let (_, name, dtype) = self
412 .fields
413 .get_full(name)
414 .ok_or_else(|| polars_err!(col_not_found = name))?;
415 PolarsResult::Ok((name.clone(), dtype.clone()))
416 })
417 .collect::<PolarsResult<PlIndexMap<PlSmallStr, _>>>()?;
418 Ok(Self::from(schema))
419 }
420
421 pub fn try_project_indices(&self, indices: &[usize]) -> PolarsResult<Self> {
422 let fields = indices
423 .iter()
424 .map(|&i| {
425 let Some((k, v)) = self.fields.get_index(i) else {
426 polars_bail!(
427 SchemaFieldNotFound:
428 "projection index {} is out of bounds for schema of length {}",
429 i, self.fields.len()
430 );
431 };
432
433 Ok((k.clone(), v.clone()))
434 })
435 .collect::<PolarsResult<PlIndexMap<_, _>>>()?;
436
437 Ok(Self { fields })
438 }
439
440 pub fn filter<F: Fn(usize, &D) -> bool>(self, predicate: F) -> Self {
443 let fields = self
444 .fields
445 .into_iter()
446 .enumerate()
447 .filter_map(|(index, (name, d))| {
448 if (predicate)(index, &d) {
449 Some((name, d))
450 } else {
451 None
452 }
453 })
454 .collect();
455
456 Self { fields }
457 }
458
459 pub fn from_iter_check_duplicates<I, F>(iter: I) -> PolarsResult<Self>
460 where
461 I: IntoIterator<Item = F>,
462 F: Into<(PlSmallStr, D)>,
463 {
464 let iter = iter.into_iter();
465 let mut slf = Self::with_capacity(iter.size_hint().1.unwrap_or(0));
466
467 for v in iter {
468 let (name, d) = v.into();
469
470 if slf.contains(&name) {
471 return Err(err_msg(&name));
472
473 fn err_msg(name: &str) -> PolarsError {
474 polars_err!(Duplicate: "duplicate name when building schema '{}'", &name)
475 }
476 }
477
478 slf.fields.insert(name, d);
479 }
480
481 Ok(slf)
482 }
483}
484
485pub fn ensure_matching_schema_names<D>(lhs: &Schema<D>, rhs: &Schema<D>) -> PolarsResult<()> {
486 let lhs_names = lhs.iter_names();
487 let rhs_names = rhs.iter_names();
488
489 if !(lhs_names.len() == rhs_names.len() && lhs_names.zip(rhs_names).all(|(l, r)| l == r)) {
490 polars_bail!(
491 SchemaMismatch:
492 "lhs: {:?} rhs: {:?}",
493 lhs.iter_names().collect::<Vec<_>>(), rhs.iter_names().collect::<Vec<_>>()
494 )
495 }
496
497 Ok(())
498}
499
500impl<D: Debug> Debug for Schema<D> {
501 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
502 writeln!(f, "Schema:")?;
503 for (name, field) in self.fields.iter() {
504 writeln!(f, "name: {name}, field: {field:?}")?;
505 }
506 Ok(())
507 }
508}
509
510impl<D: Hash> Hash for Schema<D> {
511 fn hash<H: Hasher>(&self, state: &mut H) {
512 self.fields.iter().for_each(|v| v.hash(state))
513 }
514}
515
516impl<D: PartialEq> PartialEq for Schema<D> {
519 fn eq(&self, other: &Self) -> bool {
520 self.fields.len() == other.fields.len()
521 && self
522 .fields
523 .iter()
524 .zip(other.fields.iter())
525 .all(|(a, b)| a == b)
526 }
527}
528
529impl<D> From<PlIndexMap<PlSmallStr, D>> for Schema<D> {
530 fn from(fields: PlIndexMap<PlSmallStr, D>) -> Self {
531 Self { fields }
532 }
533}
534
535impl<F, D> FromIterator<F> for Schema<D>
536where
537 F: Into<(PlSmallStr, D)>,
538{
539 fn from_iter<I: IntoIterator<Item = F>>(iter: I) -> Self {
540 let fields = PlIndexMap::from_iter(iter.into_iter().map(|x| x.into()));
541 Self { fields }
542 }
543}
544
545impl<F, D> Extend<F> for Schema<D>
546where
547 F: Into<(PlSmallStr, D)>,
548{
549 fn extend<T: IntoIterator<Item = F>>(&mut self, iter: T) {
550 self.fields.extend(iter.into_iter().map(|x| x.into()))
551 }
552}
553
554impl<D> IntoIterator for Schema<D> {
555 type IntoIter = <PlIndexMap<PlSmallStr, D> as IntoIterator>::IntoIter;
556 type Item = (PlSmallStr, D);
557
558 fn into_iter(self) -> Self::IntoIter {
559 self.fields.into_iter()
560 }
561}