use std::sync::{Arc, OnceLock};
use polars_error::PolarsResult;
use super::broadcast::{broadcast_columns, infer_broadcast_height};
use super::validation::validate_columns_slice;
use crate::frame::column::Column;
use crate::schema::{Schema, SchemaRef};
#[derive(Clone)]
pub struct DataFrame {
height: usize,
columns: Vec<Column>,
cached_schema: OnceLock<SchemaRef>,
}
impl Default for DataFrame {
fn default() -> Self {
DataFrame::empty()
}
}
impl DataFrame {
pub const fn empty() -> Self {
DataFrame::empty_with_height(0)
}
pub const fn empty_with_height(height: usize) -> Self {
DataFrame {
height,
columns: vec![],
cached_schema: OnceLock::new(),
}
}
pub fn new(height: usize, columns: Vec<Column>) -> PolarsResult<Self> {
validate_columns_slice(height, &columns)
.map_err(|e| e.wrap_msg(|e| format!("could not create a new DataFrame: {e}")))?;
Ok(unsafe { DataFrame::_new_unchecked_impl(height, columns) })
}
pub fn new_infer_height(columns: Vec<Column>) -> PolarsResult<Self> {
DataFrame::new(columns.first().map_or(0, |c| c.len()), columns)
}
pub unsafe fn new_unchecked(height: usize, columns: Vec<Column>) -> DataFrame {
if cfg!(debug_assertions) {
validate_columns_slice(height, &columns).unwrap();
}
unsafe { DataFrame::_new_unchecked_impl(height, columns) }
}
pub unsafe fn new_unchecked_infer_height(columns: Vec<Column>) -> DataFrame {
DataFrame::new_unchecked(columns.first().map_or(0, |c| c.len()), columns)
}
#[expect(clippy::missing_safety_doc)]
pub const unsafe fn _new_unchecked_impl(height: usize, columns: Vec<Column>) -> DataFrame {
DataFrame {
height,
columns,
cached_schema: OnceLock::new(),
}
}
pub fn new_with_broadcast(height: usize, mut columns: Vec<Column>) -> PolarsResult<Self> {
broadcast_columns(height, &mut columns)?;
DataFrame::new(height, columns)
}
pub fn new_infer_broadcast(columns: Vec<Column>) -> PolarsResult<Self> {
DataFrame::new_with_broadcast(infer_broadcast_height(&columns), columns)
}
pub unsafe fn new_unchecked_with_broadcast(
height: usize,
mut columns: Vec<Column>,
) -> PolarsResult<Self> {
broadcast_columns(height, &mut columns)?;
Ok(unsafe { DataFrame::new_unchecked(height, columns) })
}
pub unsafe fn new_unchecked_infer_broadcast(columns: Vec<Column>) -> PolarsResult<Self> {
DataFrame::new_unchecked_with_broadcast(infer_broadcast_height(&columns), columns)
}
pub fn empty_with_schema(schema: &Schema) -> Self {
let cols = schema
.iter()
.map(|(name, dtype)| Column::new_empty(name.clone(), dtype))
.collect();
unsafe { DataFrame::_new_unchecked_impl(0, cols) }
}
pub fn empty_with_arc_schema(schema: SchemaRef) -> Self {
let mut df = DataFrame::empty_with_schema(&schema);
unsafe { df.set_schema(schema) };
df
}
#[inline]
pub unsafe fn set_height(&mut self, height: usize) -> &mut Self {
self.height = height;
self
}
#[inline]
pub fn height(&self) -> usize {
self.height
}
#[inline]
pub fn width(&self) -> usize {
self.columns.len()
}
#[inline]
pub fn shape(&self) -> (usize, usize) {
(self.height(), self.width())
}
#[inline]
pub fn shape_has_zero(&self) -> bool {
matches!(self.shape(), (0, _) | (_, 0))
}
#[inline]
pub fn columns(&self) -> &[Column] {
self.columns.as_slice()
}
#[inline]
pub fn into_columns(self) -> Vec<Column> {
self.columns
}
#[inline]
pub unsafe fn columns_mut(&mut self) -> &mut Vec<Column> {
self.clear_schema();
&mut self.columns
}
#[inline]
pub unsafe fn columns_mut_retain_schema(&mut self) -> &mut Vec<Column> {
&mut self.columns
}
pub fn schema(&self) -> &SchemaRef {
let out = self.cached_schema.get_or_init(|| {
Arc::new(
Schema::from_iter_check_duplicates(
self.columns
.iter()
.map(|x| (x.name().clone(), x.dtype().clone())),
)
.unwrap(),
)
});
assert_eq!(out.len(), self.width());
out
}
#[inline]
pub fn cached_schema(&self) -> Option<&SchemaRef> {
self.cached_schema.get()
}
#[inline]
pub unsafe fn set_schema(&mut self, schema: SchemaRef) -> &mut Self {
self.cached_schema = schema.into();
self
}
#[inline]
pub unsafe fn with_schema(mut self, schema: SchemaRef) -> Self {
self.cached_schema = schema.into();
self
}
#[inline]
pub unsafe fn set_opt_schema(&mut self, schema: Option<SchemaRef>) -> &mut Self {
if let Some(schema) = schema {
unsafe { self.set_schema(schema) };
}
self
}
#[inline]
pub unsafe fn set_schema_from(&mut self, from: &DataFrame) -> &mut Self {
self.set_opt_schema(from.cached_schema().cloned());
self
}
#[inline]
pub unsafe fn with_schema_from(mut self, from: &DataFrame) -> Self {
self.set_opt_schema(from.cached_schema().cloned());
self
}
#[inline]
fn clear_schema(&mut self) -> &mut Self {
self.cached_schema = OnceLock::new();
self
}
}