Skip to main content

robin_sparkless_core/engine/
dataframe.rs

1//! Engine-agnostic DataFrame backend trait.
2
3use super::{CollectedRows, JoinType};
4use crate::error::EngineError;
5use crate::expr::ExprIr;
6use crate::schema::StructType;
7
8/// Backend for GroupedData (result of group_by).
9pub trait GroupedDataBackend: Send + Sync {
10    fn agg(&self, exprs: &[ExprIr]) -> Result<Box<dyn DataFrameBackend>, EngineError>;
11    fn count(&self) -> Result<Box<dyn DataFrameBackend>, EngineError>;
12    fn sum(&self, column: &str) -> Result<Box<dyn DataFrameBackend>, EngineError>;
13    fn min(&self, column: &str) -> Result<Box<dyn DataFrameBackend>, EngineError>;
14    fn max(&self, column: &str) -> Result<Box<dyn DataFrameBackend>, EngineError>;
15    fn mean(&self, column: &str) -> Result<Box<dyn DataFrameBackend>, EngineError>;
16    fn avg(&self, columns: &[&str]) -> Result<Box<dyn DataFrameBackend>, EngineError>;
17}
18
19/// Backend for DataFrame operations. All expression arguments use ExprIr.
20/// Implementors can be downcast via `as_any()` for backend-specific operations (e.g. join).
21pub trait DataFrameBackend: Send + Sync {
22    /// For downcasting to concrete backend type (e.g. for join with same backend).
23    fn as_any(&self) -> &(dyn std::any::Any + Send + Sync);
24
25    fn filter(&self, condition: &ExprIr) -> Result<Box<dyn DataFrameBackend>, EngineError>;
26    fn select(&self, exprs: &[ExprIr]) -> Result<Box<dyn DataFrameBackend>, EngineError>;
27    fn select_columns(&self, columns: &[&str]) -> Result<Box<dyn DataFrameBackend>, EngineError>;
28    fn with_column(
29        &self,
30        name: &str,
31        expr: &ExprIr,
32    ) -> Result<Box<dyn DataFrameBackend>, EngineError>;
33    fn join(
34        &self,
35        other: &dyn DataFrameBackend,
36        on: &[&str],
37        how: JoinType,
38    ) -> Result<Box<dyn DataFrameBackend>, EngineError>;
39    fn group_by(&self, column_names: &[&str]) -> Result<Box<dyn GroupedDataBackend>, EngineError>;
40    fn order_by(
41        &self,
42        column_names: &[&str],
43        ascending: &[bool],
44    ) -> Result<Box<dyn DataFrameBackend>, EngineError>;
45    fn limit(&self, n: usize) -> Result<Box<dyn DataFrameBackend>, EngineError>;
46    fn union(&self, other: &dyn DataFrameBackend)
47    -> Result<Box<dyn DataFrameBackend>, EngineError>;
48    fn union_by_name(
49        &self,
50        other: &dyn DataFrameBackend,
51        allow_missing_columns: bool,
52    ) -> Result<Box<dyn DataFrameBackend>, EngineError>;
53    fn distinct(&self, subset: Option<Vec<&str>>)
54    -> Result<Box<dyn DataFrameBackend>, EngineError>;
55    fn drop_columns(&self, columns: &[&str]) -> Result<Box<dyn DataFrameBackend>, EngineError>;
56    fn with_column_renamed(
57        &self,
58        old_name: &str,
59        new_name: &str,
60    ) -> Result<Box<dyn DataFrameBackend>, EngineError>;
61    fn cross_join(
62        &self,
63        other: &dyn DataFrameBackend,
64    ) -> Result<Box<dyn DataFrameBackend>, EngineError>;
65
66    fn collect(&self) -> Result<CollectedRows, EngineError>;
67    fn schema(&self) -> Result<StructType, EngineError>;
68    fn columns(&self) -> Result<Vec<String>, EngineError>;
69    fn count(&self) -> Result<u64, EngineError>;
70}