Skip to main content

fraiseql_core/compiler/window_functions/
mod.rs

1//! Window Function Planning Module
2//!
3//! Generates execution plans for SQL window functions.
4//!
5//! # Architecture
6//!
7//! ```text
8//! WindowRequest (high-level, semantic)
9//!      ↓
10//! WindowPlanner::plan() (validates against FactTableMetadata)
11//!      ↓
12//! WindowExecutionPlan (low-level, SQL expressions)
13//!      ↓
14//! WindowSqlGenerator (database-specific SQL)
15//! ```
16//!
17//! # Window Functions
18//!
19//! Window functions perform calculations across sets of table rows that are related
20//! to the current row, without collapsing them into a single output row like GROUP BY.
21//!
22//! ## Function Types
23//!
24//! ### Ranking Functions
25//! - `ROW_NUMBER()` - Sequential number within partition
26//! - `RANK()` - Rank with gaps for ties
27//! - `DENSE_RANK()` - Rank without gaps
28//! - `NTILE(n)` - Divide rows into n groups
29//! - `PERCENT_RANK()` - Relative rank (0 to 1)
30//! - `CUME_DIST()` - Cumulative distribution
31//!
32//! ### Value Functions
33//! - `LAG(field, offset)` - Value from previous row
34//! - `LEAD(field, offset)` - Value from next row
35//! - `FIRST_VALUE(field)` - First value in window
36//! - `LAST_VALUE(field)` - Last value in window
37//! - `NTH_VALUE(field, n)` - Nth value in window
38//!
39//! ### Aggregate as Window
40//! - `SUM(field) OVER (...)` - Running total
41//! - `AVG(field) OVER (...)` - Moving average
42//! - `COUNT(*) OVER (...)` - Running count
43//!
44//! # High-Level Example (`WindowRequest`)
45//!
46//! ```text
47//! // Illustrative output structure only — not directly runnable.
48//! // Requires: FactTableMetadata from compiled schema.
49//! let request = WindowRequest {
50//!     table_name: "tf_sales",
51//!     select: [Measure("revenue"), Dimension("category")],
52//!     windows: [{
53//!         function: RowNumber,
54//!         alias: "rank",
55//!         partition_by: [Dimension("category")],
56//!         order_by: [("revenue", Desc)],
57//!     }],
58//!     where_clause: None,
59//!     limit: Some(100),
60//! };
61//!
62//! let plan = WindowPlanner::plan(request, &metadata)?;
63//! ```
64//!
65//! # SQL Example (`WindowExecutionPlan` output)
66//!
67//! ```sql
68//! -- Running total
69//! SELECT
70//!     occurred_at,
71//!     revenue,
72//!     SUM(revenue) OVER (
73//!         ORDER BY occurred_at
74//!         ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
75//!     ) as running_total
76//! FROM tf_sales;
77//!
78//! -- Ranking
79//! SELECT
80//!     category,
81//!     revenue,
82//!     ROW_NUMBER() OVER (
83//!         PARTITION BY category
84//!         ORDER BY revenue DESC
85//!     ) as rank
86//! FROM tf_sales;
87//! ```
88
89use serde::{Deserialize, Serialize};
90
91use crate::{
92    compiler::{
93        aggregation::{OrderByClause, OrderDirection},
94        fact_table::FactTableMetadata,
95    },
96    db::where_clause::WhereClause,
97    error::{FraiseQLError, Result},
98};
99
100// =============================================================================
101// High-Level Types (WindowRequest) - Semantic names, validated against metadata
102// =============================================================================
103
104/// High-level window query request using semantic field names.
105///
106/// This is the user-facing API that uses measure names and dimension paths
107/// instead of raw SQL expressions. It gets validated and converted to
108/// `WindowExecutionPlan` by `WindowPlanner::plan()`.
109///
110/// # Example
111///
112/// ```rust,ignore
113/// let request = WindowRequest {
114///     table_name: "tf_sales".to_string(),
115///     select: vec![
116///         WindowSelectColumn::Measure { name: "revenue".to_string(), alias: "revenue".to_string() },
117///         WindowSelectColumn::Dimension { path: "category".to_string(), alias: "category".to_string() },
118///     ],
119///     windows: vec![WindowFunctionRequest {
120///         function: WindowFunctionSpec::RunningSum { measure: "revenue".to_string() },
121///         alias: "running_total".to_string(),
122///         partition_by: vec![],
123///         order_by: vec![WindowOrderBy { field: "occurred_at".to_string(), direction: OrderDirection::Asc }],
124///         frame: Some(WindowFrame { ... }),
125///     }],
126///     where_clause: None,
127///     order_by: vec![],
128///     limit: Some(100),
129///     offset: None,
130/// };
131/// ```
132#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
133pub struct WindowRequest {
134    /// Fact table name (e.g., "`tf_sales`")
135    pub table_name: String,
136
137    /// Columns to select (measures, dimensions, filters)
138    pub select: Vec<WindowSelectColumn>,
139
140    /// Window function specifications
141    pub windows: Vec<WindowFunctionRequest>,
142
143    /// WHERE clause filters (applied before window computation)
144    pub where_clause: Option<WhereClause>,
145
146    /// Final ORDER BY (after window computation)
147    pub order_by: Vec<WindowOrderBy>,
148
149    /// Result limit
150    pub limit: Option<u32>,
151
152    /// Result offset
153    pub offset: Option<u32>,
154}
155
156/// Column selection for window query (semantic names).
157#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
158#[serde(tag = "type", rename_all = "snake_case")]
159#[non_exhaustive]
160pub enum WindowSelectColumn {
161    /// Select a measure column (e.g., "revenue")
162    Measure {
163        /// Measure name from `FactTableMetadata`
164        name:  String,
165        /// Result alias
166        alias: String,
167    },
168
169    /// Select a dimension from JSONB (e.g., "category")
170    Dimension {
171        /// Dimension path in JSONB
172        path:  String,
173        /// Result alias
174        alias: String,
175    },
176
177    /// Select a denormalized filter column (e.g., "`customer_id`", "`occurred_at`")
178    Filter {
179        /// Filter column name
180        name:  String,
181        /// Result alias
182        alias: String,
183    },
184}
185
186impl WindowSelectColumn {
187    /// Get the result alias for this selection.
188    #[must_use]
189    pub fn alias(&self) -> &str {
190        match self {
191            Self::Measure { alias, .. }
192            | Self::Dimension { alias, .. }
193            | Self::Filter { alias, .. } => alias,
194        }
195    }
196}
197
198/// Window function request (high-level, semantic).
199#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
200pub struct WindowFunctionRequest {
201    /// Window function type and parameters
202    pub function: WindowFunctionSpec,
203
204    /// Result column alias
205    pub alias: String,
206
207    /// PARTITION BY columns (semantic names)
208    pub partition_by: Vec<PartitionByColumn>,
209
210    /// ORDER BY within window
211    pub order_by: Vec<WindowOrderBy>,
212
213    /// Window frame specification
214    pub frame: Option<WindowFrame>,
215}
216
217/// Window function specification using semantic field names.
218///
219/// Unlike `WindowFunctionType` which uses raw SQL expressions,
220/// this uses measure/dimension names that get validated against metadata.
221#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
222#[serde(tag = "type", rename_all = "snake_case")]
223#[non_exhaustive]
224pub enum WindowFunctionSpec {
225    // =========================================================================
226    // Ranking Functions (no field reference needed)
227    // =========================================================================
228    /// `ROW_NUMBER()` - Sequential number within partition
229    RowNumber,
230
231    /// `RANK()` - Rank with gaps for ties
232    Rank,
233
234    /// `DENSE_RANK()` - Rank without gaps
235    DenseRank,
236
237    /// NTILE(n) - Divide rows into n groups
238    Ntile {
239        /// Number of groups
240        n: u32,
241    },
242
243    /// `PERCENT_RANK()` - Relative rank (0 to 1)
244    PercentRank,
245
246    /// `CUME_DIST()` - Cumulative distribution
247    CumeDist,
248
249    // =========================================================================
250    // Value Functions (reference measures or dimensions)
251    // =========================================================================
252    /// LAG(field, offset, default) - Value from previous row
253    Lag {
254        /// Measure or dimension name
255        field:   String,
256        /// Row offset (default: 1)
257        offset:  i32,
258        /// Default value when no previous row
259        default: Option<serde_json::Value>,
260    },
261
262    /// LEAD(field, offset, default) - Value from next row
263    Lead {
264        /// Measure or dimension name
265        field:   String,
266        /// Row offset (default: 1)
267        offset:  i32,
268        /// Default value when no next row
269        default: Option<serde_json::Value>,
270    },
271
272    /// `FIRST_VALUE(field)` - First value in window frame
273    FirstValue {
274        /// Measure or dimension name
275        field: String,
276    },
277
278    /// `LAST_VALUE(field)` - Last value in window frame
279    LastValue {
280        /// Measure or dimension name
281        field: String,
282    },
283
284    /// `NTH_VALUE(field`, n) - Nth value in window frame
285    NthValue {
286        /// Measure or dimension name
287        field: String,
288        /// Position (1-indexed)
289        n:     u32,
290    },
291
292    // =========================================================================
293    // Aggregate as Window Functions (reference measures)
294    // =========================================================================
295    /// SUM(measure) OVER (...) - Running total
296    RunningSum {
297        /// Measure name
298        measure: String,
299    },
300
301    /// AVG(measure) OVER (...) - Moving average
302    RunningAvg {
303        /// Measure name
304        measure: String,
305    },
306
307    /// COUNT(*) OVER (...) - Running count
308    RunningCount,
309
310    /// COUNT(field) OVER (...) - Running count of non-null values
311    RunningCountField {
312        /// Measure or dimension name
313        field: String,
314    },
315
316    /// MIN(measure) OVER (...) - Running minimum
317    RunningMin {
318        /// Measure name
319        measure: String,
320    },
321
322    /// MAX(measure) OVER (...) - Running maximum
323    RunningMax {
324        /// Measure name
325        measure: String,
326    },
327
328    /// STDDEV(measure) OVER (...) - Running standard deviation
329    RunningStddev {
330        /// Measure name
331        measure: String,
332    },
333
334    /// VARIANCE(measure) OVER (...) - Running variance
335    RunningVariance {
336        /// Measure name
337        measure: String,
338    },
339}
340
341/// PARTITION BY column specification (semantic).
342#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
343#[serde(tag = "type", rename_all = "snake_case")]
344#[non_exhaustive]
345pub enum PartitionByColumn {
346    /// Partition by dimension from JSONB
347    Dimension {
348        /// Dimension path
349        path: String,
350    },
351
352    /// Partition by denormalized filter column
353    Filter {
354        /// Filter column name
355        name: String,
356    },
357
358    /// Partition by measure (rare but valid)
359    Measure {
360        /// Measure name
361        name: String,
362    },
363}
364
365/// ORDER BY clause for window functions (semantic field names).
366#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
367pub struct WindowOrderBy {
368    /// Field name (measure, dimension, or filter)
369    pub field: String,
370
371    /// Sort direction
372    pub direction: OrderDirection,
373}
374
375// =============================================================================
376// Low-Level Types (WindowExecutionPlan) - SQL expressions, ready for execution
377// =============================================================================
378
379/// Window function execution plan
380#[derive(Debug, Clone, Serialize, Deserialize)]
381pub struct WindowExecutionPlan {
382    /// Source table name
383    pub table: String,
384
385    /// Regular SELECT columns (non-window)
386    pub select: Vec<SelectColumn>,
387
388    /// Window function expressions
389    pub windows: Vec<WindowFunction>,
390
391    /// WHERE clause filter
392    pub where_clause: Option<WhereClause>,
393
394    /// Final ORDER BY (after window computation)
395    pub order_by: Vec<OrderByClause>,
396
397    /// Result limit
398    pub limit: Option<u32>,
399
400    /// Result offset
401    pub offset: Option<u32>,
402}
403
404/// Regular SELECT column
405#[derive(Debug, Clone, Serialize, Deserialize)]
406pub struct SelectColumn {
407    /// Column expression (e.g., "revenue", "data->>'category'")
408    pub expression: String,
409
410    /// Result alias
411    pub alias: String,
412}
413
414/// Window function specification
415#[derive(Debug, Clone, Serialize, Deserialize)]
416pub struct WindowFunction {
417    /// Window function type
418    pub function: WindowFunctionType,
419
420    /// Result column alias
421    pub alias: String,
422
423    /// PARTITION BY columns
424    pub partition_by: Vec<String>,
425
426    /// ORDER BY within window
427    pub order_by: Vec<OrderByClause>,
428
429    /// Window frame specification
430    pub frame: Option<WindowFrame>,
431}
432
433/// Window function types
434#[derive(Debug, Clone, Serialize, Deserialize)]
435#[serde(tag = "type", rename_all = "snake_case")]
436#[non_exhaustive]
437pub enum WindowFunctionType {
438    // Ranking functions
439    /// `ROW_NUMBER()` - Sequential number within partition
440    RowNumber,
441
442    /// `RANK()` - Rank with gaps for ties
443    Rank,
444
445    /// `DENSE_RANK()` - Rank without gaps
446    DenseRank,
447
448    /// NTILE(n) - Divide rows into n groups
449    Ntile {
450        /// Number of groups
451        n: u32,
452    },
453
454    /// `PERCENT_RANK()` - Relative rank (0 to 1)
455    PercentRank,
456
457    /// `CUME_DIST()` - Cumulative distribution
458    CumeDist,
459
460    // Value functions
461    /// LAG(field, offset, default) - Value from previous row
462    Lag {
463        /// Field name
464        field:   String,
465        /// Row offset
466        offset:  i32,
467        /// Default value
468        default: Option<serde_json::Value>,
469    },
470
471    /// LEAD(field, offset, default) - Value from next row
472    Lead {
473        /// Field name
474        field:   String,
475        /// Row offset
476        offset:  i32,
477        /// Default value
478        default: Option<serde_json::Value>,
479    },
480
481    /// `FIRST_VALUE(field)` - First value in window
482    FirstValue {
483        /// Field name
484        field: String,
485    },
486
487    /// `LAST_VALUE(field)` - Last value in window
488    LastValue {
489        /// Field name
490        field: String,
491    },
492
493    /// `NTH_VALUE(field`, n) - Nth value in window
494    NthValue {
495        /// Field name
496        field: String,
497        /// Position
498        n:     u32,
499    },
500
501    // Aggregate as window functions
502    /// SUM(field) OVER (...) - Running total
503    Sum {
504        /// Field name
505        field: String,
506    },
507
508    /// AVG(field) OVER (...) - Moving average
509    Avg {
510        /// Field name
511        field: String,
512    },
513
514    /// COUNT(*) OVER (...) - Running count
515    Count {
516        /// Field name
517        field: Option<String>,
518    },
519
520    /// MIN(field) OVER (...) - Running minimum
521    Min {
522        /// Field name
523        field: String,
524    },
525
526    /// MAX(field) OVER (...) - Running maximum
527    Max {
528        /// Field name
529        field: String,
530    },
531
532    /// STDDEV(field) OVER (...) - Running standard deviation
533    Stddev {
534        /// Field name
535        field: String,
536    },
537
538    /// VARIANCE(field) OVER (...) - Running variance
539    Variance {
540        /// Field name
541        field: String,
542    },
543}
544
545/// Window frame specification
546#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
547pub struct WindowFrame {
548    /// Frame type (ROWS, RANGE, GROUPS)
549    pub frame_type: FrameType,
550
551    /// Frame start boundary
552    pub start: FrameBoundary,
553
554    /// Frame end boundary
555    pub end: FrameBoundary,
556
557    /// Frame exclusion (PostgreSQL only)
558    pub exclusion: Option<FrameExclusion>,
559}
560
561/// Window frame type
562#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
563#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
564#[non_exhaustive]
565pub enum FrameType {
566    /// ROWS frame - Physical rows
567    Rows,
568
569    /// RANGE frame - Logical range based on ORDER BY
570    Range,
571
572    /// GROUPS frame - Peer groups (PostgreSQL only)
573    Groups,
574}
575
576/// Window frame boundary
577#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
578#[serde(tag = "type", rename_all = "snake_case")]
579#[non_exhaustive]
580pub enum FrameBoundary {
581    /// UNBOUNDED PRECEDING
582    UnboundedPreceding,
583
584    /// N PRECEDING
585    NPreceding {
586        /// Number of rows
587        n: u32,
588    },
589
590    /// CURRENT ROW
591    CurrentRow,
592
593    /// N FOLLOWING
594    NFollowing {
595        /// Number of rows
596        n: u32,
597    },
598
599    /// UNBOUNDED FOLLOWING
600    UnboundedFollowing,
601}
602
603/// Frame exclusion mode (PostgreSQL)
604#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
605#[serde(rename_all = "snake_case")]
606#[non_exhaustive]
607pub enum FrameExclusion {
608    /// EXCLUDE CURRENT ROW
609    CurrentRow,
610
611    /// EXCLUDE GROUP
612    Group,
613
614    /// EXCLUDE TIES
615    Ties,
616
617    /// EXCLUDE NO OTHERS
618    NoOthers,
619}
620
621mod codegen;
622mod planner;
623pub use self::{codegen::WindowPlanner, planner::WindowFunctionPlanner};
624
625#[cfg(test)]
626mod tests;