fraiseql_core/compiler/window_functions/mod.rs
1//! Window Function Planning Module
2//!
3//! Generates execution plans for SQL window functions.
4//!
5//! # Architecture
6//!
7//! ```text
8//! WindowRequest (high-level, semantic)
9//! ↓
10//! WindowPlanner::plan() (validates against FactTableMetadata)
11//! ↓
12//! WindowExecutionPlan (low-level, SQL expressions)
13//! ↓
14//! WindowSqlGenerator (database-specific SQL)
15//! ```
16//!
17//! # Window Functions
18//!
19//! Window functions perform calculations across sets of table rows that are related
20//! to the current row, without collapsing them into a single output row like GROUP BY.
21//!
22//! ## Function Types
23//!
24//! ### Ranking Functions
25//! - `ROW_NUMBER()` - Sequential number within partition
26//! - `RANK()` - Rank with gaps for ties
27//! - `DENSE_RANK()` - Rank without gaps
28//! - `NTILE(n)` - Divide rows into n groups
29//! - `PERCENT_RANK()` - Relative rank (0 to 1)
30//! - `CUME_DIST()` - Cumulative distribution
31//!
32//! ### Value Functions
33//! - `LAG(field, offset)` - Value from previous row
34//! - `LEAD(field, offset)` - Value from next row
35//! - `FIRST_VALUE(field)` - First value in window
36//! - `LAST_VALUE(field)` - Last value in window
37//! - `NTH_VALUE(field, n)` - Nth value in window
38//!
39//! ### Aggregate as Window
40//! - `SUM(field) OVER (...)` - Running total
41//! - `AVG(field) OVER (...)` - Moving average
42//! - `COUNT(*) OVER (...)` - Running count
43//!
44//! # High-Level Example (`WindowRequest`)
45//!
46//! ```text
47//! // Illustrative output structure only — not directly runnable.
48//! // Requires: FactTableMetadata from compiled schema.
49//! let request = WindowRequest {
50//! table_name: "tf_sales",
51//! select: [Measure("revenue"), Dimension("category")],
52//! windows: [{
53//! function: RowNumber,
54//! alias: "rank",
55//! partition_by: [Dimension("category")],
56//! order_by: [("revenue", Desc)],
57//! }],
58//! where_clause: None,
59//! limit: Some(100),
60//! };
61//!
62//! let plan = WindowPlanner::plan(request, &metadata)?;
63//! ```
64//!
65//! # SQL Example (`WindowExecutionPlan` output)
66//!
67//! ```sql
68//! -- Running total
69//! SELECT
70//! occurred_at,
71//! revenue,
72//! SUM(revenue) OVER (
73//! ORDER BY occurred_at
74//! ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
75//! ) as running_total
76//! FROM tf_sales;
77//!
78//! -- Ranking
79//! SELECT
80//! category,
81//! revenue,
82//! ROW_NUMBER() OVER (
83//! PARTITION BY category
84//! ORDER BY revenue DESC
85//! ) as rank
86//! FROM tf_sales;
87//! ```
88
89use serde::{Deserialize, Serialize};
90
91use crate::{
92 compiler::{
93 aggregation::{OrderByClause, OrderDirection},
94 fact_table::FactTableMetadata,
95 },
96 db::where_clause::WhereClause,
97 error::{FraiseQLError, Result},
98};
99
100// =============================================================================
101// High-Level Types (WindowRequest) - Semantic names, validated against metadata
102// =============================================================================
103
104/// High-level window query request using semantic field names.
105///
106/// This is the user-facing API that uses measure names and dimension paths
107/// instead of raw SQL expressions. It gets validated and converted to
108/// `WindowExecutionPlan` by `WindowPlanner::plan()`.
109///
110/// # Example
111///
112/// ```rust,ignore
113/// let request = WindowRequest {
114/// table_name: "tf_sales".to_string(),
115/// select: vec![
116/// WindowSelectColumn::Measure { name: "revenue".to_string(), alias: "revenue".to_string() },
117/// WindowSelectColumn::Dimension { path: "category".to_string(), alias: "category".to_string() },
118/// ],
119/// windows: vec![WindowFunctionRequest {
120/// function: WindowFunctionSpec::RunningSum { measure: "revenue".to_string() },
121/// alias: "running_total".to_string(),
122/// partition_by: vec![],
123/// order_by: vec![WindowOrderBy { field: "occurred_at".to_string(), direction: OrderDirection::Asc }],
124/// frame: Some(WindowFrame { ... }),
125/// }],
126/// where_clause: None,
127/// order_by: vec![],
128/// limit: Some(100),
129/// offset: None,
130/// };
131/// ```
132#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
133pub struct WindowRequest {
134 /// Fact table name (e.g., "`tf_sales`")
135 pub table_name: String,
136
137 /// Columns to select (measures, dimensions, filters)
138 pub select: Vec<WindowSelectColumn>,
139
140 /// Window function specifications
141 pub windows: Vec<WindowFunctionRequest>,
142
143 /// WHERE clause filters (applied before window computation)
144 pub where_clause: Option<WhereClause>,
145
146 /// Final ORDER BY (after window computation)
147 pub order_by: Vec<WindowOrderBy>,
148
149 /// Result limit
150 pub limit: Option<u32>,
151
152 /// Result offset
153 pub offset: Option<u32>,
154}
155
156/// Column selection for window query (semantic names).
157#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
158#[serde(tag = "type", rename_all = "snake_case")]
159#[non_exhaustive]
160pub enum WindowSelectColumn {
161 /// Select a measure column (e.g., "revenue")
162 Measure {
163 /// Measure name from `FactTableMetadata`
164 name: String,
165 /// Result alias
166 alias: String,
167 },
168
169 /// Select a dimension from JSONB (e.g., "category")
170 Dimension {
171 /// Dimension path in JSONB
172 path: String,
173 /// Result alias
174 alias: String,
175 },
176
177 /// Select a denormalized filter column (e.g., "`customer_id`", "`occurred_at`")
178 Filter {
179 /// Filter column name
180 name: String,
181 /// Result alias
182 alias: String,
183 },
184}
185
186impl WindowSelectColumn {
187 /// Get the result alias for this selection.
188 #[must_use]
189 pub fn alias(&self) -> &str {
190 match self {
191 Self::Measure { alias, .. }
192 | Self::Dimension { alias, .. }
193 | Self::Filter { alias, .. } => alias,
194 }
195 }
196}
197
198/// Window function request (high-level, semantic).
199#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
200pub struct WindowFunctionRequest {
201 /// Window function type and parameters
202 pub function: WindowFunctionSpec,
203
204 /// Result column alias
205 pub alias: String,
206
207 /// PARTITION BY columns (semantic names)
208 pub partition_by: Vec<PartitionByColumn>,
209
210 /// ORDER BY within window
211 pub order_by: Vec<WindowOrderBy>,
212
213 /// Window frame specification
214 pub frame: Option<WindowFrame>,
215}
216
217/// Window function specification using semantic field names.
218///
219/// Unlike `WindowFunctionType` which uses raw SQL expressions,
220/// this uses measure/dimension names that get validated against metadata.
221#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
222#[serde(tag = "type", rename_all = "snake_case")]
223#[non_exhaustive]
224pub enum WindowFunctionSpec {
225 // =========================================================================
226 // Ranking Functions (no field reference needed)
227 // =========================================================================
228 /// `ROW_NUMBER()` - Sequential number within partition
229 RowNumber,
230
231 /// `RANK()` - Rank with gaps for ties
232 Rank,
233
234 /// `DENSE_RANK()` - Rank without gaps
235 DenseRank,
236
237 /// NTILE(n) - Divide rows into n groups
238 Ntile {
239 /// Number of groups
240 n: u32,
241 },
242
243 /// `PERCENT_RANK()` - Relative rank (0 to 1)
244 PercentRank,
245
246 /// `CUME_DIST()` - Cumulative distribution
247 CumeDist,
248
249 // =========================================================================
250 // Value Functions (reference measures or dimensions)
251 // =========================================================================
252 /// LAG(field, offset, default) - Value from previous row
253 Lag {
254 /// Measure or dimension name
255 field: String,
256 /// Row offset (default: 1)
257 offset: i32,
258 /// Default value when no previous row
259 default: Option<serde_json::Value>,
260 },
261
262 /// LEAD(field, offset, default) - Value from next row
263 Lead {
264 /// Measure or dimension name
265 field: String,
266 /// Row offset (default: 1)
267 offset: i32,
268 /// Default value when no next row
269 default: Option<serde_json::Value>,
270 },
271
272 /// `FIRST_VALUE(field)` - First value in window frame
273 FirstValue {
274 /// Measure or dimension name
275 field: String,
276 },
277
278 /// `LAST_VALUE(field)` - Last value in window frame
279 LastValue {
280 /// Measure or dimension name
281 field: String,
282 },
283
284 /// `NTH_VALUE(field`, n) - Nth value in window frame
285 NthValue {
286 /// Measure or dimension name
287 field: String,
288 /// Position (1-indexed)
289 n: u32,
290 },
291
292 // =========================================================================
293 // Aggregate as Window Functions (reference measures)
294 // =========================================================================
295 /// SUM(measure) OVER (...) - Running total
296 RunningSum {
297 /// Measure name
298 measure: String,
299 },
300
301 /// AVG(measure) OVER (...) - Moving average
302 RunningAvg {
303 /// Measure name
304 measure: String,
305 },
306
307 /// COUNT(*) OVER (...) - Running count
308 RunningCount,
309
310 /// COUNT(field) OVER (...) - Running count of non-null values
311 RunningCountField {
312 /// Measure or dimension name
313 field: String,
314 },
315
316 /// MIN(measure) OVER (...) - Running minimum
317 RunningMin {
318 /// Measure name
319 measure: String,
320 },
321
322 /// MAX(measure) OVER (...) - Running maximum
323 RunningMax {
324 /// Measure name
325 measure: String,
326 },
327
328 /// STDDEV(measure) OVER (...) - Running standard deviation
329 RunningStddev {
330 /// Measure name
331 measure: String,
332 },
333
334 /// VARIANCE(measure) OVER (...) - Running variance
335 RunningVariance {
336 /// Measure name
337 measure: String,
338 },
339}
340
341/// PARTITION BY column specification (semantic).
342#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
343#[serde(tag = "type", rename_all = "snake_case")]
344#[non_exhaustive]
345pub enum PartitionByColumn {
346 /// Partition by dimension from JSONB
347 Dimension {
348 /// Dimension path
349 path: String,
350 },
351
352 /// Partition by denormalized filter column
353 Filter {
354 /// Filter column name
355 name: String,
356 },
357
358 /// Partition by measure (rare but valid)
359 Measure {
360 /// Measure name
361 name: String,
362 },
363}
364
365/// ORDER BY clause for window functions (semantic field names).
366#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
367pub struct WindowOrderBy {
368 /// Field name (measure, dimension, or filter)
369 pub field: String,
370
371 /// Sort direction
372 pub direction: OrderDirection,
373}
374
375// =============================================================================
376// Low-Level Types (WindowExecutionPlan) - SQL expressions, ready for execution
377// =============================================================================
378
379/// Window function execution plan
380#[derive(Debug, Clone, Serialize, Deserialize)]
381pub struct WindowExecutionPlan {
382 /// Source table name
383 pub table: String,
384
385 /// Regular SELECT columns (non-window)
386 pub select: Vec<SelectColumn>,
387
388 /// Window function expressions
389 pub windows: Vec<WindowFunction>,
390
391 /// WHERE clause filter
392 pub where_clause: Option<WhereClause>,
393
394 /// Final ORDER BY (after window computation)
395 pub order_by: Vec<OrderByClause>,
396
397 /// Result limit
398 pub limit: Option<u32>,
399
400 /// Result offset
401 pub offset: Option<u32>,
402}
403
404/// Regular SELECT column
405#[derive(Debug, Clone, Serialize, Deserialize)]
406pub struct SelectColumn {
407 /// Column expression (e.g., "revenue", "data->>'category'")
408 pub expression: String,
409
410 /// Result alias
411 pub alias: String,
412}
413
414/// Window function specification
415#[derive(Debug, Clone, Serialize, Deserialize)]
416pub struct WindowFunction {
417 /// Window function type
418 pub function: WindowFunctionType,
419
420 /// Result column alias
421 pub alias: String,
422
423 /// PARTITION BY columns
424 pub partition_by: Vec<String>,
425
426 /// ORDER BY within window
427 pub order_by: Vec<OrderByClause>,
428
429 /// Window frame specification
430 pub frame: Option<WindowFrame>,
431}
432
433/// Window function types
434#[derive(Debug, Clone, Serialize, Deserialize)]
435#[serde(tag = "type", rename_all = "snake_case")]
436#[non_exhaustive]
437pub enum WindowFunctionType {
438 // Ranking functions
439 /// `ROW_NUMBER()` - Sequential number within partition
440 RowNumber,
441
442 /// `RANK()` - Rank with gaps for ties
443 Rank,
444
445 /// `DENSE_RANK()` - Rank without gaps
446 DenseRank,
447
448 /// NTILE(n) - Divide rows into n groups
449 Ntile {
450 /// Number of groups
451 n: u32,
452 },
453
454 /// `PERCENT_RANK()` - Relative rank (0 to 1)
455 PercentRank,
456
457 /// `CUME_DIST()` - Cumulative distribution
458 CumeDist,
459
460 // Value functions
461 /// LAG(field, offset, default) - Value from previous row
462 Lag {
463 /// Field name
464 field: String,
465 /// Row offset
466 offset: i32,
467 /// Default value
468 default: Option<serde_json::Value>,
469 },
470
471 /// LEAD(field, offset, default) - Value from next row
472 Lead {
473 /// Field name
474 field: String,
475 /// Row offset
476 offset: i32,
477 /// Default value
478 default: Option<serde_json::Value>,
479 },
480
481 /// `FIRST_VALUE(field)` - First value in window
482 FirstValue {
483 /// Field name
484 field: String,
485 },
486
487 /// `LAST_VALUE(field)` - Last value in window
488 LastValue {
489 /// Field name
490 field: String,
491 },
492
493 /// `NTH_VALUE(field`, n) - Nth value in window
494 NthValue {
495 /// Field name
496 field: String,
497 /// Position
498 n: u32,
499 },
500
501 // Aggregate as window functions
502 /// SUM(field) OVER (...) - Running total
503 Sum {
504 /// Field name
505 field: String,
506 },
507
508 /// AVG(field) OVER (...) - Moving average
509 Avg {
510 /// Field name
511 field: String,
512 },
513
514 /// COUNT(*) OVER (...) - Running count
515 Count {
516 /// Field name
517 field: Option<String>,
518 },
519
520 /// MIN(field) OVER (...) - Running minimum
521 Min {
522 /// Field name
523 field: String,
524 },
525
526 /// MAX(field) OVER (...) - Running maximum
527 Max {
528 /// Field name
529 field: String,
530 },
531
532 /// STDDEV(field) OVER (...) - Running standard deviation
533 Stddev {
534 /// Field name
535 field: String,
536 },
537
538 /// VARIANCE(field) OVER (...) - Running variance
539 Variance {
540 /// Field name
541 field: String,
542 },
543}
544
545/// Window frame specification
546#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
547pub struct WindowFrame {
548 /// Frame type (ROWS, RANGE, GROUPS)
549 pub frame_type: FrameType,
550
551 /// Frame start boundary
552 pub start: FrameBoundary,
553
554 /// Frame end boundary
555 pub end: FrameBoundary,
556
557 /// Frame exclusion (PostgreSQL only)
558 pub exclusion: Option<FrameExclusion>,
559}
560
561/// Window frame type
562#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
563#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
564#[non_exhaustive]
565pub enum FrameType {
566 /// ROWS frame - Physical rows
567 Rows,
568
569 /// RANGE frame - Logical range based on ORDER BY
570 Range,
571
572 /// GROUPS frame - Peer groups (PostgreSQL only)
573 Groups,
574}
575
576/// Window frame boundary
577#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
578#[serde(tag = "type", rename_all = "snake_case")]
579#[non_exhaustive]
580pub enum FrameBoundary {
581 /// UNBOUNDED PRECEDING
582 UnboundedPreceding,
583
584 /// N PRECEDING
585 NPreceding {
586 /// Number of rows
587 n: u32,
588 },
589
590 /// CURRENT ROW
591 CurrentRow,
592
593 /// N FOLLOWING
594 NFollowing {
595 /// Number of rows
596 n: u32,
597 },
598
599 /// UNBOUNDED FOLLOWING
600 UnboundedFollowing,
601}
602
603/// Frame exclusion mode (PostgreSQL)
604#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
605#[serde(rename_all = "snake_case")]
606#[non_exhaustive]
607pub enum FrameExclusion {
608 /// EXCLUDE CURRENT ROW
609 CurrentRow,
610
611 /// EXCLUDE GROUP
612 Group,
613
614 /// EXCLUDE TIES
615 Ties,
616
617 /// EXCLUDE NO OTHERS
618 NoOthers,
619}
620
621mod codegen;
622mod planner;
623pub use self::{codegen::WindowPlanner, planner::WindowFunctionPlanner};
624
625#[cfg(test)]
626mod tests;