dsq_core/ops/
mod.rs

1#![allow(missing_docs)]
2
3//! Operations module for dsq
4//!
5//! This module provides core operations for manipulating `DataFrames` and other data structures
6//! in dsq. It includes basic operations like selection and filtering, aggregation operations
7//! like group by and statistical functions, join operations for combining datasets, and
8//! transformation operations for reshaping and converting data.
9//!
10//! The operations are designed to work with both Polars `DataFrames` and jq-style arrays
11//! and objects, providing a unified interface that bridges the gap between structured
12//! and semi-structured data processing.
13//!
14//! # Examples
15//!
16//! Basic operations:
17//! ```rust,ignore
18//! use dsq_core::ops::basic::{select_columns, filter_values, sort_by_columns, SortOptions};
19//! use dsq_core::value::Value;
20//!
21//! // Select specific columns
22//! let columns = vec!["name".to_string(), "age".to_string()];
23//! let result = select_columns(&dataframe_value, &columns).unwrap();
24//!
25//! // Sort by multiple columns
26//! let sort_opts = vec![
27//!     SortOptions::desc("age"),
28//!     SortOptions::asc("name"),
29//! ];
30//! let sorted = sort_by_columns(&result, &sort_opts).unwrap();
31//! ```
32//!
33//! Aggregation operations:
34//! ```rust,ignore
35//! use dsq_core::ops::aggregate::{group_by_agg, AggregationFunction};
36//! use dsq_core::value::Value;
37//!
38//! let group_cols = vec!["department".to_string()];
39//! let agg_funcs = vec![
40//!     AggregationFunction::Sum("salary".to_string()),
41//!     AggregationFunction::Mean("age".to_string()),
42//!     AggregationFunction::Count,
43//! ];
44//! let result = group_by_agg(&dataframe_value, &group_cols, &agg_funcs).unwrap();
45//! ```
46//!
47//! Join operations:
48//! ```rust,ignore
49//! use dsq_core::ops::join::{inner_join, JoinKeys};
50//! use dsq_core::value::Value;
51//!
52//! let keys = JoinKeys::on(vec!["id".to_string()]);
53//! let result = inner_join(&left_df, &right_df, &keys).unwrap();
54//! ```
55//!
56//! Transformation operations:
57//! ```rust,ignore
58//! use dsq_core::ops::transform::{transpose, string::to_uppercase};
59//! use dsq_core::value::Value;
60//!
61//! let transposed = transpose(&dataframe_value).unwrap();
62//! let uppercase = to_uppercase(&dataframe_value, "name").unwrap();
63//! ```
64//!
65//! # Architecture
66//!
67//! The operations module is organized into four main submodules:
68//!
69//! - [`basic`] - Fundamental operations like selection, filtering, sorting
70//! - [`aggregate`] - Grouping and aggregation operations
71//! - `join` - Operations for combining multiple datasets
72//! - [`transform`] - Data transformation and reshaping operations
73//!
74//! Each operation is designed to work with the [`Value`] enum, which can represent
75//! `DataFrames`, `LazyFrames`, arrays, objects, or scalar values. This unified approach
76//! allows operations to work seamlessly across different data representations.
77//!
78//! # Error Handling
79//!
80//! All operations return [`Result<Value>`] where errors are represented by the
81//! `Error` type. Common error scenarios include:
82//!
83//! - Type mismatches (e.g., trying to sort non-comparable values)
84//! - Missing columns or fields
85//! - Schema incompatibilities in joins
86//! - Invalid operation parameters
87//!
88//! Operations will attempt to handle mixed data types gracefully where possible,
89//! but will return descriptive errors when operations cannot be completed.
90//!
91//! # Performance Considerations
92//!
93//! Operations are optimized for different data representations:
94//!
95//! - **`DataFrame` operations** leverage Polars' optimized columnar processing
96//! - **`LazyFrame` operations** benefit from query optimization and lazy evaluation
97//! - **Array operations** use efficient in-memory processing for jq-style data
98//! - **Mixed operations** automatically convert between representations as needed
99//!
100//! For large datasets, prefer using `LazyFrame` operations when possible to take
101//! advantage of query optimization and memory-efficient processing.
102
103pub mod access_ops;
104pub mod aggregate;
105pub mod arithmetic_ops;
106pub mod assignment_ops;
107pub mod basic;
108pub mod comparison_ops;
109pub mod construct_ops;
110pub mod join;
111pub mod logical_ops;
112pub mod pipeline;
113pub mod selection_ops;
114#[cfg(test)]
115pub mod tests;
116/// Data transformation operations
117pub mod transform;
118pub mod utils;
119
120// Re-export commonly used types and functions for convenience
121pub use basic::{
122    add_column, count, drop_columns, filter_rows, filter_values, head, rename_columns, reverse,
123    select_columns, slice, sort_by_columns, tail, unique, SortOptions,
124};
125
126pub use aggregate::{
127    cumulative_agg, group_by, group_by_agg, pivot, rolling_agg, unpivot, AggregationFunction,
128    WindowFunction,
129};
130
131pub use join::{
132    inner_join, join, join_multiple, left_join, outer_join, right_join, JoinKeys, JoinOptions,
133    JoinType, JoinValidation,
134};
135
136pub use transform::Transform;
137
138pub use pipeline::{
139    apply_operations, apply_operations_mut, apply_operations_owned, OperationPipeline,
140};
141
142pub use utils::{recommended_batch_size, supports_operation, OperationType};
143
144// Re-export operation types from filter_ops modules
145pub use access_ops::{
146    FieldAccessOperation, IdentityOperation, IndexOperation, IterateOperation, SliceOperation,
147};
148
149pub use construct_ops::{
150    ArrayConstructOperation, LiteralOperation, ObjectConstructOperation, VariableOperation,
151};
152
153pub use arithmetic_ops::{AddOperation, DivOperation, MulOperation, SubOperation};
154
155pub use comparison_ops::{
156    EqOperation, GeOperation, GtOperation, LeOperation, LtOperation, NeOperation,
157};
158
159pub use logical_ops::{AndOperation, NegationOperation, OrOperation};
160
161pub use assignment_ops::{AssignAddOperation, AssignUpdateOperation};
162
163pub use selection_ops::SelectConditionOperation;
164
165use crate::error::Result;
166use crate::Value;
167
168/// Trait for operations that can be applied to values
169///
170/// This trait provides a common interface for all data operations,
171/// allowing them to be composed and chained together.
172pub trait Operation {
173    /// Apply the operation to a value
174    fn apply(&self, value: &Value) -> Result<Value>;
175
176    /// Get a description of what this operation does
177    fn description(&self) -> String;
178
179    /// Check if this operation can be applied to the given value type
180    fn is_applicable(&self, value: &Value) -> bool {
181        // Default implementation: try to apply and see if it works
182        self.apply(value).is_ok()
183    }
184}