1use core::fmt::Debug;
2use core::hash::{Hash, Hasher};
3
4use indexmap::map::MutableKeys;
5use polars_error::{PolarsError, PolarsResult, polars_bail, polars_ensure, polars_err};
6use polars_utils::aliases::{InitHashMaps, PlIndexMap};
7use polars_utils::pl_str::PlSmallStr;
8
9#[derive(Debug, Clone, Default)]
10#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
11#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
12pub struct Schema<Field, Metadata> {
13 fields: PlIndexMap<PlSmallStr, Field>,
14 metadata: Metadata,
15}
16
17impl<Field: Eq, Metadata: Eq> Eq for Schema<Field, Metadata> {}
18
19impl<Field, Metadata: Default> Schema<Field, Metadata> {
20 pub fn with_capacity(capacity: usize) -> Self {
21 let fields = PlIndexMap::with_capacity(capacity);
22 Self {
23 fields,
24 metadata: Metadata::default(),
25 }
26 }
27
28 pub fn from_iter_check_duplicates<I, F>(iter: I) -> PolarsResult<Self>
29 where
30 I: IntoIterator<Item = F>,
31 F: Into<(PlSmallStr, Field)>,
32 {
33 Self::try_from_iter_check_duplicates(
34 iter.into_iter().map(PolarsResult::Ok),
35 |name: &str| polars_err!(Duplicate: "duplicate name when building schema '{}'", &name),
36 )
37 }
38
39 pub fn try_from_iter_check_duplicates<I, F, E>(iter: I, err_func: E) -> PolarsResult<Self>
40 where
41 I: IntoIterator<Item = PolarsResult<F>>,
42 F: Into<(PlSmallStr, Field)>,
43 E: Fn(&str) -> PolarsError,
44 {
45 let iter = iter.into_iter();
46 let mut slf = Self::with_capacity(iter.size_hint().1.unwrap_or(0));
47
48 for v in iter {
49 let (name, d) = v?.into();
50
51 if slf.contains(&name) {
52 return Err(err_func(&name));
53 }
54
55 slf.fields.insert(name, d);
56 }
57
58 Ok(slf)
59 }
60}
61
62impl<Field, Metadata> Schema<Field, Metadata> {
63 pub fn reserve(&mut self, additional: usize) {
65 self.fields.reserve(additional);
66 }
67
68 #[inline]
70 pub fn len(&self) -> usize {
71 self.fields.len()
72 }
73
74 #[inline]
75 pub fn is_empty(&self) -> bool {
76 self.fields.is_empty()
77 }
78
79 pub fn metadata(&self) -> &Metadata {
80 &self.metadata
81 }
82
83 pub fn metadata_mut(&mut self) -> &mut Metadata {
84 &mut self.metadata
85 }
86
87 pub fn rename(&mut self, old: &str, new: PlSmallStr) -> Option<PlSmallStr> {
92 let (old_index, old_name, dtype) = self.fields.swap_remove_full(old)?;
94 let (new_index, _) = self.fields.insert_full(new, dtype);
96 self.fields.swap_indices(old_index, new_index);
99
100 Some(old_name)
101 }
102
103 pub fn insert(&mut self, key: PlSmallStr, value: Field) -> Option<Field> {
104 self.fields.insert(key, value)
105 }
106
107 pub fn insert_at_index(
122 &mut self,
123 mut index: usize,
124 name: PlSmallStr,
125 dtype: Field,
126 ) -> PolarsResult<Option<Field>> {
127 polars_ensure!(
128 index <= self.len(),
129 OutOfBounds:
130 "index {} is out of bounds for schema with length {} (the max index allowed is self.len())",
131 index,
132 self.len()
133 );
134
135 let (old_index, old_dtype) = self.fields.insert_full(name, dtype);
136
137 if old_dtype.is_some() && index == self.len() {
140 index -= 1;
141 }
142 self.fields.move_index(old_index, index);
143 Ok(old_dtype)
144 }
145
146 pub fn get(&self, name: &str) -> Option<&Field> {
148 self.fields.get(name)
149 }
150
151 pub fn get_mut(&mut self, name: &str) -> Option<&mut Field> {
153 self.fields.get_mut(name)
154 }
155
156 pub fn try_get(&self, name: &str) -> PolarsResult<&Field> {
158 self.get(name)
159 .ok_or_else(|| polars_err!(SchemaFieldNotFound: "{}", name))
160 }
161
162 pub fn try_get_mut(&mut self, name: &str) -> PolarsResult<&mut Field> {
164 self.fields
165 .get_mut(name)
166 .ok_or_else(|| polars_err!(SchemaFieldNotFound: "{}", name))
167 }
168
169 pub fn get_full(&self, name: &str) -> Option<(usize, &PlSmallStr, &Field)> {
173 self.fields.get_full(name)
174 }
175
176 pub fn try_get_full(&self, name: &str) -> PolarsResult<(usize, &PlSmallStr, &Field)> {
180 self.fields
181 .get_full(name)
182 .ok_or_else(|| polars_err!(SchemaFieldNotFound: "{}", name))
183 }
184
185 pub fn get_at_index(&self, index: usize) -> Option<(&PlSmallStr, &Field)> {
190 self.fields.get_index(index)
191 }
192
193 pub fn try_get_at_index(&self, index: usize) -> PolarsResult<(&PlSmallStr, &Field)> {
194 self.fields.get_index(index).ok_or_else(|| polars_err!(ComputeError: "index {index} out of bounds with 'schema' of len: {}", self.len()))
195 }
196
197 pub fn get_at_index_mut(&mut self, index: usize) -> Option<(&mut PlSmallStr, &mut Field)> {
202 self.fields.get_index_mut2(index)
203 }
204
205 pub fn remove(&mut self, name: &str) -> Option<Field> {
213 self.fields.swap_remove(name)
214 }
215
216 pub fn shift_remove(&mut self, name: &str) -> Option<Field> {
223 self.fields.shift_remove(name)
224 }
225
226 pub fn shift_remove_index(&mut self, index: usize) -> Option<(PlSmallStr, Field)> {
233 self.fields.shift_remove_index(index)
234 }
235
236 pub fn contains(&self, name: &str) -> bool {
238 self.get(name).is_some()
239 }
240
241 pub fn set_dtype(&mut self, name: &str, dtype: Field) -> Option<Field> {
249 let old_dtype = self.fields.get_mut(name)?;
250 Some(std::mem::replace(old_dtype, dtype))
251 }
252
253 pub fn set_dtype_at_index(&mut self, index: usize, dtype: Field) -> Option<Field> {
261 let (_, old_dtype) = self.fields.get_index_mut(index)?;
262 Some(std::mem::replace(old_dtype, dtype))
263 }
264
265 pub fn with_column(&mut self, name: PlSmallStr, dtype: Field) -> Option<Field> {
272 self.fields.insert(name, dtype)
273 }
274
275 pub fn try_insert(&mut self, name: PlSmallStr, value: Field) -> PolarsResult<()> {
277 if self.fields.contains_key(&name) {
278 polars_bail!(Duplicate: "column '{}' is duplicate", name)
279 }
280
281 self.fields.insert(name, value);
282
283 Ok(())
284 }
285
286 pub fn hstack_mut(
290 &mut self,
291 columns: impl IntoIterator<Item = impl Into<(PlSmallStr, Field)>>,
292 ) -> PolarsResult<()> {
293 for v in columns {
294 let (k, v) = v.into();
295 self.try_insert(k, v)?;
296 }
297
298 Ok(())
299 }
300
301 pub fn hstack(
305 mut self,
306 columns: impl IntoIterator<Item = impl Into<(PlSmallStr, Field)>>,
307 ) -> PolarsResult<Self> {
308 self.hstack_mut(columns)?;
309 Ok(self)
310 }
311
312 pub fn sort_by_key<T, F>(&mut self, sort_key: F)
313 where
314 T: Ord,
315 F: FnMut(&PlSmallStr, &Field) -> T,
316 {
317 self.fields.sort_by_key(sort_key);
318 }
319
320 pub fn merge(&mut self, other: Self) {
328 self.fields.extend(other.fields)
329 }
330
331 pub fn iter(&self) -> impl ExactSizeIterator<Item = (&PlSmallStr, &Field)> + '_ {
335 self.fields.iter()
336 }
337
338 pub fn iter_mut(&mut self) -> impl ExactSizeIterator<Item = (&PlSmallStr, &mut Field)> + '_ {
339 self.fields.iter_mut()
340 }
341
342 pub fn iter_names(&self) -> impl '_ + ExactSizeIterator<Item = &PlSmallStr> {
344 self.fields.iter().map(|(name, _dtype)| name)
345 }
346
347 pub fn iter_names_cloned(&self) -> impl '_ + ExactSizeIterator<Item = PlSmallStr> {
348 self.iter_names().cloned()
349 }
350
351 pub fn iter_values(&self) -> impl '_ + ExactSizeIterator<Item = &Field> {
353 self.fields.iter().map(|(_name, dtype)| dtype)
354 }
355
356 pub fn into_iter_values(self) -> impl ExactSizeIterator<Item = Field> {
357 self.fields.into_values()
358 }
359
360 pub fn iter_values_mut(&mut self) -> impl '_ + ExactSizeIterator<Item = &mut Field> {
362 self.fields.iter_mut().map(|(_name, dtype)| dtype)
363 }
364
365 pub fn index_of(&self, name: &str) -> Option<usize> {
366 self.fields.get_index_of(name)
367 }
368
369 pub fn try_index_of(&self, name: &str) -> PolarsResult<usize> {
370 let Some(i) = self.fields.get_index_of(name) else {
371 polars_bail!(
372 ColumnNotFound:
373 "unable to find column {:?}; valid columns: {:?}",
374 name, self.iter_names().collect::<Vec<_>>(),
375 )
376 };
377
378 Ok(i)
379 }
380
381 pub fn field_compare<'a, 'b>(
383 &'a self,
384 other: &'b Self,
385 self_extra: &mut Vec<(usize, (&'a PlSmallStr, &'a Field))>,
386 other_extra: &mut Vec<(usize, (&'b PlSmallStr, &'b Field))>,
387 ) {
388 self_extra.extend(
389 self.iter()
390 .enumerate()
391 .filter(|(_, (n, _))| !other.contains(n)),
392 );
393 other_extra.extend(
394 other
395 .iter()
396 .enumerate()
397 .filter(|(_, (n, _))| !self.contains(n)),
398 );
399 }
400}
401
402impl<Field, Metadata> Schema<Field, Metadata>
403where
404 Field: Clone + Default,
405 Metadata: Clone,
406{
407 pub fn new_inserting_at_index(
420 &self,
421 index: usize,
422 name: PlSmallStr,
423 field: Field,
424 ) -> PolarsResult<Self> {
425 polars_ensure!(
426 index <= self.len(),
427 OutOfBounds:
428 "index {} is out of bounds for schema with length {} (the max index allowed is self.len())",
429 index,
430 self.len()
431 );
432
433 let mut new = Self {
434 fields: Default::default(),
435 metadata: self.metadata().clone(),
436 };
437 let mut iter = self.fields.iter().filter_map(|(fld_name, dtype)| {
438 (fld_name != &name).then_some((fld_name.clone(), dtype.clone()))
439 });
440 new.fields.extend(iter.by_ref().take(index));
441 new.fields.insert(name.clone(), field);
442 new.fields.extend(iter);
443 Ok(new)
444 }
445
446 pub fn merge_from_ref(&mut self, other: &Self) {
454 self.fields.extend(
455 other
456 .iter()
457 .map(|(column, field)| (column.clone(), field.clone())),
458 )
459 }
460
461 pub fn try_project<I>(&self, columns: I) -> PolarsResult<Self>
463 where
464 I: IntoIterator,
465 I::Item: AsRef<str>,
466 {
467 let fields = columns
468 .into_iter()
469 .map(|c| {
470 let name = c.as_ref();
471 let (_, name, dtype) = self
472 .fields
473 .get_full(name)
474 .ok_or_else(|| polars_err!(col_not_found = name))?;
475 PolarsResult::Ok((name.clone(), dtype.clone()))
476 })
477 .collect::<PolarsResult<PlIndexMap<PlSmallStr, _>>>()?;
478 Ok(Self {
479 fields,
480 metadata: self.metadata().clone(),
481 })
482 }
483
484 pub fn try_project_indices(&self, indices: &[usize]) -> PolarsResult<Self> {
485 let fields = indices
486 .iter()
487 .map(|&i| {
488 let Some((k, v)) = self.fields.get_index(i) else {
489 polars_bail!(
490 SchemaFieldNotFound:
491 "projection index {} is out of bounds for schema of length {}",
492 i, self.fields.len()
493 );
494 };
495
496 Ok((k.clone(), v.clone()))
497 })
498 .collect::<PolarsResult<PlIndexMap<_, _>>>()?;
499
500 Ok(Self {
501 fields,
502 metadata: self.metadata().clone(),
503 })
504 }
505
506 pub fn filter<F: Fn(usize, &Field) -> bool>(self, predicate: F) -> Self {
509 let metadata = self.metadata().clone();
510 let fields = self
511 .fields
512 .into_iter()
513 .enumerate()
514 .filter_map(|(index, (name, d))| {
515 if (predicate)(index, &d) {
516 Some((name, d))
517 } else {
518 None
519 }
520 })
521 .collect();
522
523 Self { fields, metadata }
524 }
525}
526
527impl<Field: Hash, Metadata: Hash> Hash for Schema<Field, Metadata> {
528 fn hash<H: Hasher>(&self, state: &mut H) {
529 Hash::hash(&SchemaHashEqWrap::from(self), state)
530 }
531}
532
533impl<Field: PartialEq, Metadata: PartialEq> PartialEq for Schema<Field, Metadata> {
536 fn eq(&self, other: &Self) -> bool {
537 PartialEq::eq(
538 &SchemaHashEqWrap::from(self),
539 &SchemaHashEqWrap::from(other),
540 )
541 }
542}
543
544#[derive(Hash, PartialEq)]
548struct SchemaHashEqWrap<'a, Field, Metadata> {
549 fields: &'a indexmap::map::Slice<PlSmallStr, Field>,
550 metadata: &'a Metadata,
551}
552
553impl<'a, Field, Metadata> From<&'a Schema<Field, Metadata>>
554 for SchemaHashEqWrap<'a, Field, Metadata>
555{
556 fn from(value: &'a Schema<Field, Metadata>) -> Self {
557 let Schema { fields, metadata } = value;
558
559 Self {
560 fields: fields.as_slice(),
561 metadata,
562 }
563 }
564}
565
566impl<Field, Metadata: Default> From<PlIndexMap<PlSmallStr, Field>> for Schema<Field, Metadata> {
567 fn from(fields: PlIndexMap<PlSmallStr, Field>) -> Self {
568 Self {
569 fields,
570 metadata: Metadata::default(),
571 }
572 }
573}
574
575impl<F, Field, Metadata: Default> FromIterator<F> for Schema<Field, Metadata>
576where
577 F: Into<(PlSmallStr, Field)>,
578{
579 fn from_iter<I: IntoIterator<Item = F>>(iter: I) -> Self {
580 let fields = PlIndexMap::from_iter(iter.into_iter().map(|x| x.into()));
581 Self {
582 fields,
583 metadata: Metadata::default(),
584 }
585 }
586}
587
588impl<F, Field, Metadata> Extend<F> for Schema<Field, Metadata>
589where
590 F: Into<(PlSmallStr, Field)>,
591{
592 fn extend<T: IntoIterator<Item = F>>(&mut self, iter: T) {
593 self.fields.extend(iter.into_iter().map(|x| x.into()))
594 }
595}
596
597impl<Field, Metadata> IntoIterator for Schema<Field, Metadata> {
598 type IntoIter = <PlIndexMap<PlSmallStr, Field> as IntoIterator>::IntoIter;
599 type Item = (PlSmallStr, Field);
600
601 fn into_iter(self) -> Self::IntoIter {
602 self.fields.into_iter()
603 }
604}
605
606#[cfg(test)]
607mod tests {
608 use super::Schema;
609
610 #[test]
611 fn test_schema_eq_checks_key_order() {
612 let lhs: Schema<(), ()> = Schema::from_iter([("a".into(), ()), ("b".into(), ())]);
613 let rhs: Schema<(), ()> = Schema::from_iter([("b".into(), ()), ("a".into(), ())]);
614
615 assert_ne!(lhs, rhs);
616 }
617}