use std::fmt::{Debug, Formatter};
use indexmap::IndexMap;
#[cfg(feature = "serde-lazy")]
use serde::{Deserialize, Serialize};
use crate::prelude::*;
#[derive(Eq, Clone, Default)]
#[cfg_attr(feature = "serde-lazy", derive(Serialize, Deserialize))]
pub struct Schema {
inner: PlIndexMap<String, DataType>,
}
impl PartialEq for Schema {
fn eq(&self, other: &Self) -> bool {
self.len() == other.len() && self.iter().zip(other.iter()).all(|(a, b)| a == b)
}
}
impl Debug for Schema {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
writeln!(f, "Schema:")?;
for (name, dtype) in self.inner.iter() {
writeln!(f, "name: {name}, data type: {dtype:?}")?;
}
Ok(())
}
}
impl<I, J> From<I> for Schema
where
I: Iterator<Item = J>,
J: Into<Field>,
{
fn from(iter: I) -> Self {
let mut map: PlIndexMap<_, _> =
IndexMap::with_capacity_and_hasher(iter.size_hint().0, ahash::RandomState::default());
for fld in iter {
let fld = fld.into();
map.insert(fld.name().clone(), fld.data_type().clone());
}
Self { inner: map }
}
}
impl<J> FromIterator<J> for Schema
where
J: Into<Field>,
{
fn from_iter<I: IntoIterator<Item = J>>(iter: I) -> Self {
Schema::from(iter.into_iter())
}
}
impl Schema {
pub fn try_from_fallible<I>(flds: I) -> PolarsResult<Self>
where
I: IntoIterator<Item = PolarsResult<Field>>,
{
let iter = flds.into_iter();
let mut map: PlIndexMap<_, _> =
IndexMap::with_capacity_and_hasher(iter.size_hint().0, ahash::RandomState::default());
for fld in iter {
let fld = fld?;
map.insert(fld.name().clone(), fld.data_type().clone());
}
Ok(Self { inner: map })
}
pub fn new() -> Self {
Self::with_capacity(0)
}
pub fn with_capacity(capacity: usize) -> Self {
let map: PlIndexMap<_, _> =
IndexMap::with_capacity_and_hasher(capacity, ahash::RandomState::default());
Self { inner: map }
}
#[inline]
pub fn len(&self) -> usize {
self.inner.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
pub fn rename(&mut self, old: &str, new: String) -> Option<()> {
let dtype = self.inner.get(old)?.clone();
self.inner.insert(new, dtype);
self.inner.swap_remove(old);
Some(())
}
pub fn insert_index(&self, index: usize, name: String, dtype: DataType) -> Option<Self> {
if index > self.len() {
return None;
}
let mut new = Self::default();
let mut iter = self
.inner
.iter()
.map(|(name, dtype)| (name.clone(), dtype.clone()));
new.inner.extend((&mut iter).take(index));
new.inner.insert(name, dtype);
new.inner.extend(iter);
Some(new)
}
pub fn get(&self, name: &str) -> Option<&DataType> {
self.inner.get(name)
}
pub fn try_get(&self, name: &str) -> PolarsResult<&DataType> {
self.get(name)
.ok_or_else(|| PolarsError::NotFound(name.to_string().into()))
}
pub fn get_full(&self, name: &str) -> Option<(usize, &String, &DataType)> {
self.inner.get_full(name)
}
pub fn get_field(&self, name: &str) -> Option<Field> {
self.inner
.get(name)
.map(|dtype| Field::new(name, dtype.clone()))
}
pub fn try_get_field(&self, name: &str) -> PolarsResult<Field> {
self.inner
.get(name)
.ok_or_else(|| PolarsError::NotFound(name.to_string().into()))
.map(|dtype| Field::new(name, dtype.clone()))
}
pub fn get_index(&self, index: usize) -> Option<(&String, &DataType)> {
self.inner.get_index(index)
}
pub fn contains(&self, name: &str) -> bool {
self.get(name).is_some()
}
pub fn get_index_mut(&mut self, index: usize) -> Option<(&mut String, &mut DataType)> {
self.inner.get_index_mut(index)
}
pub fn coerce_by_name(&mut self, name: &str, dtype: DataType) -> Option<()> {
*self.inner.get_mut(name)? = dtype;
Some(())
}
pub fn coerce_by_index(&mut self, index: usize, dtype: DataType) -> Option<()> {
*self.inner.get_index_mut(index)?.1 = dtype;
Some(())
}
pub fn with_column(&mut self, name: String, dtype: DataType) {
self.inner.insert(name, dtype);
}
pub fn merge(&mut self, other: Self) {
self.inner.extend(other.inner.into_iter())
}
pub fn to_arrow(&self) -> ArrowSchema {
let fields: Vec<_> = self
.inner
.iter()
.map(|(name, dtype)| ArrowField::new(name, dtype.to_arrow(), true))
.collect();
ArrowSchema::from(fields)
}
pub fn iter_fields(&self) -> impl Iterator<Item = Field> + ExactSizeIterator + '_ {
self.inner
.iter()
.map(|(name, dtype)| Field::new(name, dtype.clone()))
}
pub fn iter_dtypes(&self) -> impl Iterator<Item = &DataType> + ExactSizeIterator + '_ {
self.inner.iter().map(|(_name, dtype)| dtype)
}
pub fn iter_names(&self) -> impl Iterator<Item = &String> + '_ + ExactSizeIterator {
self.inner.iter().map(|(name, _dtype)| name)
}
pub fn iter(&self) -> impl Iterator<Item = (&String, &DataType)> + '_ {
self.inner.iter()
}
}
pub type SchemaRef = Arc<Schema>;
impl IntoIterator for Schema {
type Item = (String, DataType);
type IntoIter = <PlIndexMap<String, DataType> as IntoIterator>::IntoIter;
fn into_iter(self) -> Self::IntoIter {
self.inner.into_iter()
}
}
#[cfg(feature = "private")]
pub trait IndexOfSchema: Debug {
fn index_of(&self, name: &str) -> Option<usize>;
fn try_index_of(&self, name: &str) -> PolarsResult<usize> {
self.index_of(name).ok_or_else(|| {
PolarsError::SchemaMisMatch(
format!("Unable to get field named \"{name}\" from schema: {self:?}",).into(),
)
})
}
}
impl IndexOfSchema for Schema {
fn index_of(&self, name: &str) -> Option<usize> {
self.inner.get_index_of(name)
}
}
impl IndexOfSchema for ArrowSchema {
fn index_of(&self, name: &str) -> Option<usize> {
self.fields.iter().position(|f| f.name == name)
}
}