vibesql_executor/evaluator/window/
partitioning.rs

1//! Row partitioning for window functions
2//!
3//! Groups rows into partitions based on PARTITION BY expressions.
4
5use vibesql_ast::Expression;
6use vibesql_storage::Row;
7use vibesql_types::SqlValue;
8
9/// A partition of rows for window function evaluation
10#[derive(Debug, Clone)]
11pub struct Partition {
12    pub rows: Vec<Row>,
13    /// Original indices of rows before partitioning/sorting
14    pub original_indices: Vec<usize>,
15    /// Column name to index mapping (for resolving named column references)
16    pub column_map: std::collections::HashMap<String, usize>,
17}
18
19impl Partition {
20    pub fn new(rows: Vec<Row>) -> Self {
21        let original_indices = (0..rows.len()).collect();
22        Self {
23            rows,
24            original_indices,
25            column_map: std::collections::HashMap::new(),
26        }
27    }
28
29    pub fn with_indices(rows: Vec<Row>, original_indices: Vec<usize>) -> Self {
30        Self {
31            rows,
32            original_indices,
33            column_map: std::collections::HashMap::new(),
34        }
35    }
36
37    pub fn with_column_map(
38        rows: Vec<Row>,
39        original_indices: Vec<usize>,
40        column_map: std::collections::HashMap<String, usize>,
41    ) -> Self {
42        Self { rows, original_indices, column_map }
43    }
44
45    pub fn len(&self) -> usize {
46        self.rows.len()
47    }
48
49    pub fn is_empty(&self) -> bool {
50        self.rows.is_empty()
51    }
52
53    /// Get column index by name (case-insensitive)
54    pub fn get_column_index(&self, name: &str) -> Option<usize> {
55        // First try exact match
56        if let Some(&idx) = self.column_map.get(name) {
57            return Some(idx);
58        }
59        // Try lowercase match
60        let lower = name.to_lowercase();
61        self.column_map.get(&lower).copied()
62    }
63}
64
65/// Partition rows by PARTITION BY expressions
66///
67/// Groups rows into partitions based on partition expressions.
68/// If no PARTITION BY clause, all rows go into a single partition.
69///
70/// Partitions are ordered by their key values (using BTreeMap with SqlValue keys
71/// for proper numeric/string ordering), matching SQLite's behavior.
72pub fn partition_rows<F>(
73    rows: Vec<Row>,
74    partition_by: &Option<Vec<Expression>>,
75    eval_fn: F,
76) -> Vec<Partition>
77where
78    F: Fn(&Expression, &Row) -> Result<SqlValue, String>,
79{
80    // If no PARTITION BY, return all rows in single partition
81    let Some(partition_exprs) = partition_by else {
82        return vec![Partition::new(rows)];
83    };
84
85    if partition_exprs.is_empty() {
86        return vec![Partition::new(rows)];
87    }
88
89    // Group rows by partition key values
90    // Use BTreeMap with SqlValue keys for proper value ordering (not string ordering)
91    // This ensures Integer(3) < Integer(7) < Integer(11) as expected
92    let mut partitions_map: std::collections::BTreeMap<Vec<SqlValue>, Vec<(usize, Row)>> =
93        std::collections::BTreeMap::new();
94
95    for (original_idx, row) in rows.into_iter().enumerate() {
96        // Evaluate partition expressions for this row
97        let mut partition_key = Vec::new();
98
99        for expr in partition_exprs {
100            let value = eval_fn(expr, &row).unwrap_or(SqlValue::Null);
101            partition_key.push(value);
102        }
103
104        partitions_map.entry(partition_key).or_default().push((original_idx, row));
105    }
106
107    // Convert BTreeMap to Vec<Partition>, preserving sorted partition key order
108    partitions_map
109        .into_values()
110        .map(|rows_with_indices| {
111            let (indices, rows): (Vec<_>, Vec<_>) = rows_with_indices.into_iter().unzip();
112            Partition::with_indices(rows, indices)
113        })
114        .collect()
115}