pub struct DataFrame {
pub constants: HashMap<Key, DataValue>,
pub dataframe: ColumnFrame,
pub metadata: HashMap<String, DataValue>,
}Expand description
DataFrame holds information about ColumnFrame.
This is used to store the data and the metadata for the candidates.
Fields§
§constants: HashMap<Key, DataValue>Constants for the dataframe - mikro optimization for the data Values which is constant for the whole dataframe are stored here
dataframe: ColumnFrameDataframe with the candidates
metadata: HashMap<String, DataValue>Metadata for the dataframe. Here you can store the information about the dataframe
Implementations§
Source§impl DataFrame
impl DataFrame
Sourcepub fn from_dict(df: HashMap<String, Vec<DataValue>>) -> Self
pub fn from_dict(df: HashMap<String, Vec<DataValue>>) -> Self
Create a DataFrame from a dictionary.
df = tdf.DataFrame.from_dict({"a": [1, 2, 3], "b": [4, 5, 6]})Sourcepub fn apply(&mut self, function: Bound<'_, PyAny>) -> Result<(), PyErr>
pub fn apply(&mut self, function: Bound<'_, PyAny>) -> Result<(), PyErr>
Apply a function to the DataFrame. The function should accept a DataFrame and return a DataFrame.
def my_function(df):
# Perform some operations on the DataFrame
return df
/// df = tdf.DataFrame.init()
df.apply(my_function)Sourcepub fn as_numpy_u32<'py>(
&self,
keys: Option<Vec<String>>,
transposed: Option<bool>,
py: Python<'py>,
) -> PyResult<Bound<'py, PyArray2<u32>>>
pub fn as_numpy_u32<'py>( &self, keys: Option<Vec<String>>, transposed: Option<bool>, py: Python<'py>, ) -> PyResult<Bound<'py, PyArray2<u32>>>
Returns slice from dataframe as numpy.array of uint32 of the given keys.
If transposed is true, the keys will be transposed.
If keys is None, all keys will be used.
import numpy as np
df = tdf.DataFrame.init()
df.push({"key1": 1, "key2": 2})
df.push({"key1": 11, "key2": 21})
a_np = df.as_numpy_u32(['key1', 'key2'])
assert np.array_equal(a_np, np.array([[1, 11], [2, 21]], dtype=np.uint32))Sourcepub fn as_numpy_u64<'py>(
&self,
keys: Option<Vec<String>>,
transposed: Option<bool>,
py: Python<'py>,
) -> PyResult<Bound<'py, PyArray2<u64>>>
pub fn as_numpy_u64<'py>( &self, keys: Option<Vec<String>>, transposed: Option<bool>, py: Python<'py>, ) -> PyResult<Bound<'py, PyArray2<u64>>>
Returns slice from dataframe as numpy.array of uint64 of the given keys.
If transposed is true, the keys will be transposed.
If keys is None, all keys will be used.
import numpy as np
df = tdf.DataFrame.init()
df.push({"key1": 1, "key2": 2})
df.push({"key1": 11, "key2": 21})
a_np = df.as_numpy_u64(['key1', 'key2'])
assert np.array_equal(a_np, np.array([[1, 11], [2, 21]], dtype=np.uint64))Sourcepub fn as_numpy_i32<'py>(
&self,
keys: Option<Vec<String>>,
transposed: Option<bool>,
py: Python<'py>,
) -> PyResult<Bound<'py, PyArray2<i32>>>
pub fn as_numpy_i32<'py>( &self, keys: Option<Vec<String>>, transposed: Option<bool>, py: Python<'py>, ) -> PyResult<Bound<'py, PyArray2<i32>>>
Returns slice from dataframe as numpy.array of int32 of the given keys.
If transposed is true, the keys will be transposed.
If keys is None, all keys will be used.
import numpy as np
df = tdf.DataFrame.init()
df.push({"key1": 1, "key2": 2})
df.push({"key1": 11, "key2": 21})
a_np = df.as_numpy_i32(['key1', 'key2'])
assert np.array_equal(a_np, np.array([[1, 11], [2, 21]], dtype=np.int32))Sourcepub fn as_numpy_i64<'py>(
&self,
keys: Option<Vec<String>>,
transposed: Option<bool>,
py: Python<'py>,
) -> PyResult<Bound<'py, PyArray2<i64>>>
pub fn as_numpy_i64<'py>( &self, keys: Option<Vec<String>>, transposed: Option<bool>, py: Python<'py>, ) -> PyResult<Bound<'py, PyArray2<i64>>>
Returns slice from dataframe as numpy.array of int64 of the given keys.
If transposed is true, the keys will be transposed.
If keys is None, all keys will be used.
import numpy as np
df = tdf.DataFrame.init()
df.push({"key1": 1, "key2": 2})
df.push({"key1": 11, "key2": 21})
a_np = df.as_numpy_i64(['key1', 'key2'])
assert np.array_equal(a_np, np.array([[1, 11], [2, 21]], dtype=np.int64))Sourcepub fn as_numpy_f32<'py>(
&self,
keys: Option<Vec<String>>,
transposed: Option<bool>,
py: Python<'py>,
) -> PyResult<Bound<'py, PyArray2<f32>>>
pub fn as_numpy_f32<'py>( &self, keys: Option<Vec<String>>, transposed: Option<bool>, py: Python<'py>, ) -> PyResult<Bound<'py, PyArray2<f32>>>
Returns slice from dataframe as numpy.array of float32 of the given keys.
If transposed is true, the keys will be transposed.
If keys is None, all keys will be used.
import numpy as np
df = tdf.DataFrame.init()
df.push({"key1": 1, "key2": 2})
df.push({"key1": 11, "key2": 21})
a_np = df.as_numpy_f32(['key1', 'key2'])
assert np.array_equal(a_np, np.array([[1, 11], [2, 21]], dtype=np.float32))Sourcepub fn as_numpy_f64<'py>(
&self,
keys: Option<Vec<String>>,
transposed: Option<bool>,
py: Python<'py>,
) -> PyResult<Bound<'py, PyArray2<f64>>>
pub fn as_numpy_f64<'py>( &self, keys: Option<Vec<String>>, transposed: Option<bool>, py: Python<'py>, ) -> PyResult<Bound<'py, PyArray2<f64>>>
Returns slice from dataframe as numpy.array of float64 of the given keys.
If transposed is true, the keys will be transposed.
If keys is None, all keys will be used.
import numpy as np
df = tdf.DataFrame.init()
df.push({"key1": 1, "key2": 2})
df.push({"key1": 11, "key2": 21})
a_np = df.as_numpy_f64(['key1', 'key2'])
assert np.array_equal(a_np, np.array([[1, 11], [2, 21]], dtype=np.float64))pub fn py_shrink(&mut self)
pub fn py_add_metadata(&mut self, key: String, value: DataValue)
pub fn py_get_metadata(&self, key: &str) -> Option<DataValue>
pub fn py_rename_key(&mut self, key: &str, new_name: &str) -> Result<(), PyErr>
pub fn py_add_alias(&mut self, key: &str, new_name: &str) -> Result<(), PyErr>
Sourcepub fn py_select<'py>(
&self,
py: Python<'py>,
keys: Option<Vec<String>>,
transposed: Option<bool>,
) -> Result<Bound<'py, PyList>, PyErr>
pub fn py_select<'py>( &self, py: Python<'py>, keys: Option<Vec<String>>, transposed: Option<bool>, ) -> Result<Bound<'py, PyList>, PyErr>
Selects data from the DataFrame.
If keys is None, all keys will be used.
If keys is provided, only the specified keys will be selected.
Returns a list of lists, where each inner list represents a row of data.
import trs_dataframe as tdf
df = tdf.DataFrame.init()
df.push({"key1": 1, "key2": 2})
df.push({"key1": 11, "key2": 21})
# selected = df.select(["key1", "key2"])
# assert selected == [[1, 2], [11, 21]]
# selected = df.select()Sourcepub fn py_select_column<'py>(
&self,
py: Python<'py>,
key: String,
) -> Result<Bound<'py, PyList>, PyErr>
pub fn py_select_column<'py>( &self, py: Python<'py>, key: String, ) -> Result<Bound<'py, PyList>, PyErr>
Selects a column from the DataFrame. If the column does not exist, it will raise a TypeError. Returns a list of values in the selected column.
import trs_dataframe as tdf
df = tdf.DataFrame.init()
df.push({"key1": 1, "key2": 2})
df.push({"key1": 11, "key2": 21})
# selected = df.select_column("key1")
# assert selected == [1, 11]
# selected = df.select_column("key2")
# assert selected == [2, 21]
# selected = df.select_column("non_existing_key") # Raises TypeErrorSourcepub fn py_join(
&mut self,
other: DataFrame,
join_type: JoinRelation,
) -> Result<(), PyErr>
pub fn py_join( &mut self, other: DataFrame, join_type: JoinRelation, ) -> Result<(), PyErr>
Joins the current DataFrame with another DataFrame.
The join type is specified by the join_type parameter.
see JoinRelation for available join types.
import trs_dataframe as tdf
df1 = tdf.DataFrame.init()
df1.push({"key1": 1, "key2": 2})
df1.push({"key1": 11, "key2": 21})
df2 = tdf.DataFrame.init()
df2.push({"key1": 1, "key2": 3})
df2.push({"key1": 11, "key2": 23})
df1.join(df2, tei.JoinRelation.extend())
assert df1.select(["key1", "key2"]) == [[1, 2], [11, 21], [1, 3], [11, 23]]Sourcepub fn py_push(&mut self, data: HashMap<Key, DataValue>) -> Result<(), PyErr>
pub fn py_push(&mut self, data: HashMap<Key, DataValue>) -> Result<(), PyErr>
Pushes a new row of data into the DataFrame. The data should be provided as a dictionary where keys are column names and values are the corresponding data values.
import trs_dataframe as tdf
df = tdf.DataFrame.init()
df.push({"key1": 1, "key2": 2})
df.push({"key1": 11, "key2": 21})Sourcepub fn py_add_column(
&mut self,
key: Key,
data: Vec<DataValue>,
) -> Result<(), PyErr>
pub fn py_add_column( &mut self, key: Key, data: Vec<DataValue>, ) -> Result<(), PyErr>
Adds a new column to the DataFrame. The column is specified by a key and a vector of data values. If the length of the data vector does not match the number of rows in the DataFrame, it will raise a TypeError.
import trs_dataframe as tdf
df = tdf.DataFrame.init()
df.push({"key1": 1, "key2": 2})
df.push({"key1": 11, "key2": 21})
df.add_column("key3", [3, 4])
assert df.select(["key1", "key2", "key3"]) == [[1, 2, 3], [11, 21, 4]]pub fn add_constant( &mut self, key: Key, feature: DataValue, ) -> Result<(), PyErr>
Sourcepub fn filter_by_expression(
&mut self,
expression: String,
) -> Result<Self, PyErr>
pub fn filter_by_expression( &mut self, expression: String, ) -> Result<Self, PyErr>
Filters the DataFrame by a given expression. The expression should be a string that can be parsed by the DataFrame’s filter method
import trs_dataframe as tdf
df = tdf.DataFrame.init()
df.push({"key1": 1, "key2": 2})
df.push({"key1": 11, "key2": 21})
df.filter_by_expression("key1 > 5")
assert df.select(["key1", "key2"]) == [[11, 21 ]]pub fn __iadd__(&mut self, object: Bound<'_, PyAny>) -> Result<(), PyErr>
pub fn __isub__(&mut self, object: Bound<'_, PyAny>) -> Result<(), PyErr>
pub fn __imul__(&mut self, object: Bound<'_, PyAny>) -> Result<(), PyErr>
pub fn __itruediv__(&mut self, object: Bound<'_, PyAny>) -> Result<(), PyErr>
pub fn __len__(&mut self) -> Result<usize, PyErr>
Source§impl DataFrame
impl DataFrame
pub fn new<C: Into<ColumnFrame>>(dataframe: C) -> Self
pub fn shrink(&mut self)
pub fn add_metadata(&mut self, key: String, value: DataValue)
pub fn get_metadata(&self, key: &str) -> Option<&DataValue>
pub fn join( &mut self, other: Self, join_type: &JoinRelation, ) -> Result<(), Error>
pub fn apply_function<F>(&mut self, keys: &[Key], func: F) -> Result<(), Error>
pub fn select(&self, keys: Option<&[Key]>) -> Result<Array2<DataValue>, Error>
pub fn select_transposed_typed<D: Extract>(&self, keys: &[Key]) -> Vec<Vec<D>>
pub fn select_column(&self, key: Key) -> Option<ArrayView1<'_, DataValue>>
pub fn select_transposed( &self, keys: Option<&[Key]>, ) -> Result<Array2<DataValue>, Error>
pub fn insert_constant(&mut self, key: Key, value: DataValue)
pub fn push<C: CandidateData>(&mut self, item: C) -> Result<(), Error>
pub fn remove_column(&mut self, keys: &[Key]) -> Result<Self, Error>
pub fn extend(&mut self, items: Self) -> Result<(), Error>
pub fn len(&self) -> usize
pub fn is_empty(&self) -> bool
pub fn add_single_column<K: Into<Key>>( &mut self, key: K, values: Array1<DataValue>, ) -> Result<(), Error>
pub fn get_single_column(&self, key: &Key) -> Option<ArrayView1<'_, DataValue>>
pub fn sorted(&self, key: &Key) -> Result<SortedDataFrame<'_>, Error>
pub fn filter(&self, filter: &FilterRules) -> Result<Self, Error>
Trait Implementations§
Source§impl<'de> Deserialize<'de> for DataFrame
impl<'de> Deserialize<'de> for DataFrame
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Source§impl From<ColumnFrame> for DataFrame
impl From<ColumnFrame> for DataFrame
Source§fn from(dataframe: ColumnFrame) -> Self
fn from(dataframe: ColumnFrame) -> Self
Source§impl From<SizedHashMap<SmartString<LazyCompact>, Vec<DataValue>>> for DataFrame
impl From<SizedHashMap<SmartString<LazyCompact>, Vec<DataValue>>> for DataFrame
Source§impl<'py> IntoPyObject<'py> for DataFrame
impl<'py> IntoPyObject<'py> for DataFrame
Source§type Output = Bound<'py, <DataFrame as IntoPyObject<'py>>::Target>
type Output = Bound<'py, <DataFrame as IntoPyObject<'py>>::Target>
Source§fn into_pyobject(
self,
py: Python<'py>,
) -> Result<<Self as IntoPyObject<'_>>::Output, <Self as IntoPyObject<'_>>::Error>
fn into_pyobject( self, py: Python<'py>, ) -> Result<<Self as IntoPyObject<'_>>::Output, <Self as IntoPyObject<'_>>::Error>
Source§impl PyClassImpl for DataFrame
impl PyClassImpl for DataFrame
Source§const IS_BASETYPE: bool = false
const IS_BASETYPE: bool = false
Source§const IS_SUBCLASS: bool = false
const IS_SUBCLASS: bool = false
Source§const IS_MAPPING: bool = false
const IS_MAPPING: bool = false
Source§const IS_SEQUENCE: bool = false
const IS_SEQUENCE: bool = false
Source§type ThreadChecker = SendablePyClass<DataFrame>
type ThreadChecker = SendablePyClass<DataFrame>
Source§type PyClassMutability = <<PyAny as PyClassBaseType>::PyClassMutability as PyClassMutability>::MutableChild
type PyClassMutability = <<PyAny as PyClassBaseType>::PyClassMutability as PyClassMutability>::MutableChild
Source§type BaseNativeType = PyAny
type BaseNativeType = PyAny
PyAny by default, and when you declare
#[pyclass(extends=PyDict)], it’s PyDict.fn items_iter() -> PyClassItemsIter
fn lazy_type_object() -> &'static LazyTypeObject<Self>
fn dict_offset() -> Option<isize>
fn weaklist_offset() -> Option<isize>
Source§impl PyClassNewTextSignature<DataFrame> for PyClassImplCollector<DataFrame>
impl PyClassNewTextSignature<DataFrame> for PyClassImplCollector<DataFrame>
fn new_text_signature(self) -> Option<&'static str>
Source§impl PyMethods<DataFrame> for PyClassImplCollector<DataFrame>
impl PyMethods<DataFrame> for PyClassImplCollector<DataFrame>
fn py_methods(self) -> &'static PyClassItems
Source§impl PyTypeInfo for DataFrame
impl PyTypeInfo for DataFrame
Source§fn type_object_raw(py: Python<'_>) -> *mut PyTypeObject
fn type_object_raw(py: Python<'_>) -> *mut PyTypeObject
Source§fn type_object(py: Python<'_>) -> Bound<'_, PyType>
fn type_object(py: Python<'_>) -> Bound<'_, PyType>
Source§fn type_object_bound(py: Python<'_>) -> Bound<'_, PyType>
fn type_object_bound(py: Python<'_>) -> Bound<'_, PyType>
PyTypeInfo::type_objectPyTypeInfo::type_object.Source§fn is_type_of(object: &Bound<'_, PyAny>) -> bool
fn is_type_of(object: &Bound<'_, PyAny>) -> bool
object is an instance of this type or a subclass of this type.Source§fn is_type_of_bound(object: &Bound<'_, PyAny>) -> bool
fn is_type_of_bound(object: &Bound<'_, PyAny>) -> bool
PyTypeInfo::is_type_ofPyTypeInfo::is_type_of.Source§fn is_exact_type_of(object: &Bound<'_, PyAny>) -> bool
fn is_exact_type_of(object: &Bound<'_, PyAny>) -> bool
object is an instance of this type.Source§fn is_exact_type_of_bound(object: &Bound<'_, PyAny>) -> bool
fn is_exact_type_of_bound(object: &Bound<'_, PyAny>) -> bool
PyTypeInfo::is_exact_type_ofPyTypeInfo::is_exact_type_of.impl DerefToPyAny for DataFrame
impl StructuralPartialEq for DataFrame
Auto Trait Implementations§
impl Freeze for DataFrame
impl RefUnwindSafe for DataFrame
impl Send for DataFrame
impl Sync for DataFrame
impl Unpin for DataFrame
impl UnwindSafe for DataFrame
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> FromPyObject<'_> for T
impl<T> FromPyObject<'_> for T
Source§impl<'py, T> FromPyObjectBound<'_, 'py> for Twhere
T: FromPyObject<'py>,
impl<'py, T> FromPyObjectBound<'_, 'py> for Twhere
T: FromPyObject<'py>,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<'py, T> IntoPyObjectExt<'py> for Twhere
T: IntoPyObject<'py>,
impl<'py, T> IntoPyObjectExt<'py> for Twhere
T: IntoPyObject<'py>,
Source§fn into_bound_py_any(self, py: Python<'py>) -> Result<Bound<'py, PyAny>, PyErr>
fn into_bound_py_any(self, py: Python<'py>) -> Result<Bound<'py, PyAny>, PyErr>
self into an owned Python object, dropping type information.