1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
//! In-memory data transformations.
//!
//! The processing layer operates on [`crate::types::DataSet`] values produced by ingestion.
//! It is intentionally simple and purely in-memory for now.
//!
//! Currently implemented:
//!
//! - [`filter()`]: row filtering by predicate
//! - [`map()`]: row mapping by user function
//! - [`reduce()`]: common reductions (count/sum/min/max/mean/variance/std/sum-squares/L2/count-distinct/median)
//! - [`feature_wise_mean_std()`], [`arg_max_row()`], [`arg_min_row()`], [`top_k_by_frequency()`]:
//! multi-column stats, arg extrema, and label frequency top‑k
//!
//! ## Example: filter → map → reduce
//!
//! ```rust
//! use rust_data_processing::processing::{filter, map, reduce, ReduceOp};
//! use rust_data_processing::types::{DataSet, DataType, Field, Schema, Value};
//!
//! let schema = Schema::new(vec![
//! Field::new("id", DataType::Int64),
//! Field::new("active", DataType::Bool),
//! Field::new("score", DataType::Float64),
//! ]);
//! let ds = DataSet::new(
//! schema,
//! vec![
//! vec![Value::Int64(1), Value::Bool(true), Value::Float64(10.0)],
//! vec![Value::Int64(2), Value::Bool(false), Value::Float64(20.0)],
//! vec![Value::Int64(3), Value::Bool(true), Value::Null],
//! ],
//! );
//!
//! // Keep only active rows.
//! let active_idx = ds.schema.index_of("active").unwrap();
//! let filtered = filter(&ds, |row| matches!(row.get(active_idx), Some(Value::Bool(true))));
//!
//! // Apply a multiplier to score.
//! let mapped = map(&filtered, |row| {
//! let mut out = row.to_vec();
//! if let Some(Value::Float64(v)) = out.get(2) {
//! out[2] = Value::Float64(v * 1.1);
//! }
//! out
//! });
//!
//! // Sum scores (nulls ignored).
//! let sum = reduce(&mapped, "score", ReduceOp::Sum).unwrap();
//! assert_eq!(sum, Value::Float64(11.0));
//! ```
pub use filter;
pub use map;
pub use ;
pub use ;