pub struct OptimizedDataFrame { /* private fields */ }Expand description
Optimized DataFrame implementation Uses columnar storage for high-speed data processing
Implementations§
Source§impl OptimizedDataFrame
impl OptimizedDataFrame
Sourcepub fn column_count(&self) -> usize
pub fn column_count(&self) -> usize
Get the number of columns in the DataFrame
Sourcepub fn column_names(&self) -> &[String]
pub fn column_names(&self) -> &[String]
Get the column names
Sourcepub fn contains_column(&self, name: &str) -> bool
pub fn contains_column(&self, name: &str) -> bool
Check if a column exists
Source§impl OptimizedDataFrame
impl OptimizedDataFrame
Sourcepub fn from_dataframe(df: &DataFrame) -> Result<Self>
pub fn from_dataframe(df: &DataFrame) -> Result<Self>
Create a DataFrame from standard DataFrame (alias for from_standard_dataframe)
This is a public alias provided for backward compatibility with existing code
Source§impl OptimizedDataFrame
impl OptimizedDataFrame
Sourcepub fn add_column<C: Into<Column>>(
&mut self,
name: impl Into<String>,
column: C,
) -> Result<()>
pub fn add_column<C: Into<Column>>( &mut self, name: impl Into<String>, column: C, ) -> Result<()>
Add a column
Sourcepub fn add_int_column(
&mut self,
name: impl Into<String>,
data: Vec<i64>,
) -> Result<()>
pub fn add_int_column( &mut self, name: impl Into<String>, data: Vec<i64>, ) -> Result<()>
Add an integer column
Sourcepub fn add_float_column(
&mut self,
name: impl Into<String>,
data: Vec<f64>,
) -> Result<()>
pub fn add_float_column( &mut self, name: impl Into<String>, data: Vec<f64>, ) -> Result<()>
Add a floating-point column
Sourcepub fn add_string_column(
&mut self,
name: impl Into<String>,
data: Vec<String>,
) -> Result<()>
pub fn add_string_column( &mut self, name: impl Into<String>, data: Vec<String>, ) -> Result<()>
Add a string column
Sourcepub fn add_boolean_column(
&mut self,
name: impl Into<String>,
data: Vec<bool>,
) -> Result<()>
pub fn add_boolean_column( &mut self, name: impl Into<String>, data: Vec<bool>, ) -> Result<()>
Add a boolean column
Sourcepub fn column(&self, name: &str) -> Result<ColumnView>
pub fn column(&self, name: &str) -> Result<ColumnView>
Get a reference to a column
Sourcepub fn column_type(&self, name: &str) -> Result<ColumnType>
pub fn column_type(&self, name: &str) -> Result<ColumnType>
Get the type of a column
Sourcepub fn remove_column(&mut self, name: &str) -> Result<Column>
pub fn remove_column(&mut self, name: &str) -> Result<Column>
Remove a column
Sourcepub fn rename_column(
&mut self,
old_name: &str,
new_name: impl Into<String>,
) -> Result<()>
pub fn rename_column( &mut self, old_name: &str, new_name: impl Into<String>, ) -> Result<()>
Rename a column
Sourcepub fn rename_columns(
&mut self,
column_map: &HashMap<String, String>,
) -> Result<()>
pub fn rename_columns( &mut self, column_map: &HashMap<String, String>, ) -> Result<()>
Rename columns in the DataFrame using a mapping
Sourcepub fn set_column_names(&mut self, names: Vec<String>) -> Result<()>
pub fn set_column_names(&mut self, names: Vec<String>) -> Result<()>
Set all column names in the DataFrame
Sourcepub fn get_value(
&self,
row_idx: usize,
column_name: &str,
) -> Result<Option<String>>
pub fn get_value( &self, row_idx: usize, column_name: &str, ) -> Result<Option<String>>
Get the value at the specified row and column
Sourcepub fn get_index(&self) -> Option<&DataFrameIndex<String>>
pub fn get_index(&self) -> Option<&DataFrameIndex<String>>
Get the index
Sourcepub fn set_default_index(&mut self) -> Result<()>
pub fn set_default_index(&mut self) -> Result<()>
Set the default index
Sourcepub fn set_index_directly(
&mut self,
index: DataFrameIndex<String>,
) -> Result<()>
pub fn set_index_directly( &mut self, index: DataFrameIndex<String>, ) -> Result<()>
Set the index directly
Sourcepub fn set_index_from_simple_index(
&mut self,
index: Index<String>,
) -> Result<()>
pub fn set_index_from_simple_index( &mut self, index: Index<String>, ) -> Result<()>
Set a simple index
Sourcepub fn reset_index(&mut self, name: &str, drop_index: bool) -> Result<()>
pub fn reset_index(&mut self, name: &str, drop_index: bool) -> Result<()>
Add index as a column
Sourcepub fn set_index_from_simple_index_internal(
&mut self,
index: Index<String>,
) -> Result<()>
👎Deprecated: This is an internal method that should not be used directly. Use set_index_from_simple_index instead.
pub fn set_index_from_simple_index_internal( &mut self, index: Index<String>, ) -> Result<()>
Source§impl OptimizedDataFrame
impl OptimizedDataFrame
Sourcepub fn append(&self, other: &OptimizedDataFrame) -> Result<Self>
pub fn append(&self, other: &OptimizedDataFrame) -> Result<Self>
Append another DataFrame vertically Concatenate two DataFrames with compatible columns and create a new DataFrame
Sourcepub fn get_row(&self, row_idx: usize) -> Result<Self>
pub fn get_row(&self, row_idx: usize) -> Result<Self>
Get a row using integer index (as a new DataFrame)
Sourcepub fn get_row_by_index(&self, key: &str) -> Result<Self>
pub fn get_row_by_index(&self, key: &str) -> Result<Self>
Get a row by index
Sourcepub fn select_by_index<I, S>(&self, keys: I) -> Result<Self>
pub fn select_by_index<I, S>(&self, keys: I) -> Result<Self>
Select rows using index
Sourcepub fn par_apply<F>(&self, func: F) -> Result<Self>
pub fn par_apply<F>(&self, func: F) -> Result<Self>
Apply mapping function (with parallel processing support)
Sourcepub fn par_filter(&self, condition_column: &str) -> Result<Self>
pub fn par_filter(&self, condition_column: &str) -> Result<Self>
Execute row filtering (automatically selects serial/parallel processing based on data size)
Sourcepub fn par_groupby(
&self,
group_by_columns: &[&str],
) -> Result<HashMap<String, Self>>
pub fn par_groupby( &self, group_by_columns: &[&str], ) -> Result<HashMap<String, Self>>
Execute groupby operation in parallel (optimized for data size)
Sourcepub fn inner_join(
&self,
other: &Self,
left_on: &str,
right_on: &str,
) -> Result<Self>
pub fn inner_join( &self, other: &Self, left_on: &str, right_on: &str, ) -> Result<Self>
Inner join
Sourcepub fn left_join(
&self,
other: &Self,
left_on: &str,
right_on: &str,
) -> Result<Self>
pub fn left_join( &self, other: &Self, left_on: &str, right_on: &str, ) -> Result<Self>
Left join
Sourcepub fn right_join(
&self,
other: &Self,
left_on: &str,
right_on: &str,
) -> Result<Self>
pub fn right_join( &self, other: &Self, left_on: &str, right_on: &str, ) -> Result<Self>
Right join
Sourcepub fn outer_join(
&self,
other: &Self,
left_on: &str,
right_on: &str,
) -> Result<Self>
pub fn outer_join( &self, other: &Self, left_on: &str, right_on: &str, ) -> Result<Self>
Outer join
Sourcepub fn applymap<F, G, H, I>(
&self,
column_name: &str,
f_str: F,
f_int: G,
f_float: H,
f_bool: I,
) -> Result<Self>
pub fn applymap<F, G, H, I>( &self, column_name: &str, f_str: F, f_int: G, f_float: H, f_bool: I, ) -> Result<Self>
Sourcepub fn melt(
&self,
id_vars: &[&str],
value_vars: Option<&[&str]>,
var_name: Option<&str>,
value_name: Option<&str>,
) -> Result<Self>
pub fn melt( &self, id_vars: &[&str], value_vars: Option<&[&str]>, var_name: Option<&str>, value_name: Option<&str>, ) -> Result<Self>
Convert DataFrame to “long format” (melt operation)
Converts multiple columns into a single “variable” column and “value” column. This implementation prioritizes performance.
§Arguments
id_vars- Column names to keep unchanged (identifier columns)value_vars- Column names to convert (value columns). If not specified, all columns except id_varsvar_name- Name for the variable column (default: “variable”)value_name- Name for the value column (default: “value”)
§Returns
Result<Self>- DataFrame converted to long format
Sourcepub fn max(&self, column_name: &str) -> Result<f64>
pub fn max(&self, column_name: &str) -> Result<f64>
Calculate the maximum value of a numeric column
Sourcepub fn min(&self, column_name: &str) -> Result<f64>
pub fn min(&self, column_name: &str) -> Result<f64>
Calculate the minimum value of a numeric column
Sourcepub fn count(&self, column_name: &str) -> Result<usize>
pub fn count(&self, column_name: &str) -> Result<usize>
Count the number of elements in a column (excluding missing values)
Sourcepub fn aggregate(
&self,
column_names: &[&str],
operation: &str,
) -> Result<HashMap<String, f64>>
pub fn aggregate( &self, column_names: &[&str], operation: &str, ) -> Result<HashMap<String, f64>>
Apply aggregation operation to multiple columns
Sourcepub fn sort_by(&self, by: &str, ascending: bool) -> Result<Self>
pub fn sort_by(&self, by: &str, ascending: bool) -> Result<Self>
Sort DataFrame by the specified column
Sourcepub fn sort_by_columns(
&self,
by: &[&str],
ascending: Option<&[bool]>,
) -> Result<Self>
pub fn sort_by_columns( &self, by: &[&str], ascending: Option<&[bool]>, ) -> Result<Self>
Sort DataFrame by multiple columns
Sourcepub fn aggregate_numeric(&self, operation: &str) -> Result<HashMap<String, f64>>
pub fn aggregate_numeric(&self, operation: &str) -> Result<HashMap<String, f64>>
Apply aggregation operation to all numeric columns
Sourcepub fn concat_rows(&self, other: &Self) -> Result<Self>
pub fn concat_rows(&self, other: &Self) -> Result<Self>
Concatenate rows from another DataFrame
This method adds the rows from another DataFrame to this one Both DataFrames must have the same column structure
Source§impl OptimizedDataFrame
Direct aggregation methods for OptimizedDataFrame that eliminate conversion overhead
impl OptimizedDataFrame
Direct aggregation methods for OptimizedDataFrame that eliminate conversion overhead
Sourcepub fn sum_direct(&self, column_name: &str) -> Result<f64>
pub fn sum_direct(&self, column_name: &str) -> Result<f64>
Calculate the sum of a numeric column using direct operations
This method is 3-5x faster than the conversion-based approach by:
- Working directly on the target column
- Using optimized column methods with null handling
- Avoiding full DataFrame copying
Sourcepub fn mean_direct(&self, column_name: &str) -> Result<f64>
pub fn mean_direct(&self, column_name: &str) -> Result<f64>
Calculate the mean of a numeric column using direct operations
Sourcepub fn max_direct(&self, column_name: &str) -> Result<f64>
pub fn max_direct(&self, column_name: &str) -> Result<f64>
Calculate the maximum value of a numeric column using direct operations
Sourcepub fn min_direct(&self, column_name: &str) -> Result<f64>
pub fn min_direct(&self, column_name: &str) -> Result<f64>
Calculate the minimum value of a numeric column using direct operations
Sourcepub fn count_direct(&self, column_name: &str) -> Result<usize>
pub fn count_direct(&self, column_name: &str) -> Result<usize>
Count the number of non-null elements in a column using direct access
Sourcepub fn sum_simd(&self, column_name: &str) -> Result<f64>
pub fn sum_simd(&self, column_name: &str) -> Result<f64>
Calculate the sum of a numeric column using SIMD-accelerated direct operations
This method provides the best performance by combining:
- Direct column access (3-5x improvement over conversion)
- SIMD vectorization (2-4x additional improvement)
- Intelligent fallback for columns with null values
Sourcepub fn mean_simd(&self, column_name: &str) -> Result<f64>
pub fn mean_simd(&self, column_name: &str) -> Result<f64>
Calculate the mean of a numeric column using SIMD-accelerated direct operations
Trait Implementations§
Source§impl Clone for OptimizedDataFrame
impl Clone for OptimizedDataFrame
Source§fn clone(&self) -> OptimizedDataFrame
fn clone(&self) -> OptimizedDataFrame
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl DataFramePlotExt for OptimizedDataFrame
impl DataFramePlotExt for OptimizedDataFrame
Source§fn plot_column<P: AsRef<Path>>(
&self,
_column: &str,
_path: P,
_title: Option<&str>,
) -> Result<()>
fn plot_column<P: AsRef<Path>>( &self, _column: &str, _path: P, _title: Option<&str>, ) -> Result<()>
Source§fn line_plot<P: AsRef<Path>>(
&self,
_column: &str,
_path: P,
_title: Option<&str>,
) -> Result<()>
fn line_plot<P: AsRef<Path>>( &self, _column: &str, _path: P, _title: Option<&str>, ) -> Result<()>
Source§fn scatter_plot<P: AsRef<Path>>(
&self,
_column: &str,
_path: P,
_title: Option<&str>,
) -> Result<()>
fn scatter_plot<P: AsRef<Path>>( &self, _column: &str, _path: P, _title: Option<&str>, ) -> Result<()>
Source§fn bar_plot<P: AsRef<Path>>(
&self,
_column: &str,
_path: P,
_title: Option<&str>,
) -> Result<()>
fn bar_plot<P: AsRef<Path>>( &self, _column: &str, _path: P, _title: Option<&str>, ) -> Result<()>
Source§fn area_plot<P: AsRef<Path>>(
&self,
_column: &str,
_path: P,
_title: Option<&str>,
) -> Result<()>
fn area_plot<P: AsRef<Path>>( &self, _column: &str, _path: P, _title: Option<&str>, ) -> Result<()>
Source§fn box_plot<P: AsRef<Path>>(
&self,
_value_column: &str,
_category_column: &str,
_path: P,
_title: Option<&str>,
) -> Result<()>
fn box_plot<P: AsRef<Path>>( &self, _value_column: &str, _category_column: &str, _path: P, _title: Option<&str>, ) -> Result<()>
Source§fn scatter_xy<P: AsRef<Path>>(
&self,
_x_column: &str,
_y_column: &str,
_path: P,
_title: Option<&str>,
) -> Result<()>
fn scatter_xy<P: AsRef<Path>>( &self, _x_column: &str, _y_column: &str, _path: P, _title: Option<&str>, ) -> Result<()>
Source§impl Debug for OptimizedDataFrame
impl Debug for OptimizedDataFrame
Auto Trait Implementations§
impl Freeze for OptimizedDataFrame
impl RefUnwindSafe for OptimizedDataFrame
impl Send for OptimizedDataFrame
impl Sync for OptimizedDataFrame
impl Unpin for OptimizedDataFrame
impl UnwindSafe for OptimizedDataFrame
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more