hamelin_analysis 0.6.14

//! Incremental refresh strategies for Hamelin queries.
//!
//! This module provides functionality to analyze Hamelin queries and determine which
//! incremental refresh strategies are supported. It computes query ranges (what data
//! to read) and replace ranges (what data to overwrite) for efficient incremental updates.
//!
//! # Incremental Strategies
//!
//! ## CascadedAppend
//!
//! The most efficient strategy for simple row-by-row transformations. New rows are
//! appended without reading or modifying existing data.
//!
//! **Requirements:**
//! - Only stateless transformations (LET, SELECT, WHERE, SORT, LIMIT, DROP, WITHIN)
//! - No windowing, aggregation, or joins
//! - No complex transformations (PARSE, EXPLODE, UNNEST, NEST)
//!
//! **Use cases:**
//! - Simple filtering and projection queries
//! - ETL pipelines with basic transformations
//!
//! ## TimeRangeRefresh
//!
//! General-purpose strategy that handles complex queries with windowing, aggregation,
//! and temporal analysis. Queries data from a time range and replaces results in
//! a potentially different time range.
//!
//! **Features:**
//! - Supports WINDOW with lookback/lookforward
//! - Supports AGG with timestamp grouping (e.g., `BY timestamp@h`)
//! - Tracks timestamp transformations through the pipeline
//! - Optional partition snapping (replace full partition boundaries)
//!
//! **Key concepts:**
//! - **Query range**: The time range of input data to read (may include lookback)
//! - **Replace range**: The time range of output data to replace
//! - **Partition snapping**: Optionally snap replace range to partition boundaries (hour, day, etc.)
//!
//! **Use cases:**
//! - Queries with rolling windows (e.g., "count events in last 24 hours")
//! - Aggregations by time buckets (e.g., "hourly event counts")
//! - Time series analysis with transformations
//!
//! # WITH Clause Handling
//!
//! The `_for_query` variants support WITH clauses (CTEs) using a **unified three-phase approach**
//! across the entire query (all CTEs + main pipeline):
//!
//! ## Unified Three-Phase Algorithm for Queries with CTEs
//!
//! ### Phase 1: Forward Pass (ALL CTEs, then Main)
//!
//! Walk through each CTE in declaration order, then main pipeline:
//! - For each CTE: Compute its replace_range using forward pass
//! - Each CTE's replace_range becomes available as a "stale range" for later CTEs/main
//! - `FROM cte` in later CTEs/main uses that CTE's replace_range as initialization
//! - `FROM base_table` uses the provided stale_range
//! - Track timestamp transformations (LET/SELECT/WINDOW/AGG)
//! - Result: Replace range for each CTE + final replace_range from main pipeline
//!
//! ### Phase 2: Optional Partition Snapping
//!
//! If `partition_unit` is provided:
//! - Snap the **main pipeline's** replace_range WIDER to partition boundaries
//! - Uses `truncate_timestamp()` for start, `next_truncation_boundary()` for end
//! - Ensures complete partitions are replaced (never partial)
//! - CTEs are not affected by snapping (they keep their computed replace_ranges)
//!
//! ### Phase 3: Backward Pass (ALL Pipelines Independently)
//!
//! Walk backward through each pipeline to compute query_ranges:
//! - Main pipeline: Start from final replace_range (possibly snapped)
//! - Each CTE: Start from **its own** replace_range (from forward pass)
//! - Use reverse_eval to compute input requirements for each command
//! - WINDOW: Add lookback/lookforward from WITHIN clause
//! - AGG: Use reverse_eval to expand for truncation boundaries
//! - Aggregate ALL query_ranges: `min(all starts), max(all ends)`
//! - Result: Single aggregated query_range covering all data needs
//!
//! ## Key Semantics
//!
//! - **Replace range**: Only the main pipeline's final replace_range (what gets written)
//! - **Query range**: Aggregated across ALL pipelines (CTEs + main) to capture all read requirements
//! - **CTE dependencies**: When main does `FROM cte`, it initializes from cte's replace_range
//! - **Independent backward passes**: Each CTE computes its own query_range independently
//!
//! # Three-Phase Algorithm (Simple Pipelines, No CTEs)
//!
//! For queries without CTEs, `compute_incremental_ranges_for_pipeline()` uses:
//!
//! ## Phase 1: Forward Pass
//!
//! Walk forward through commands tracking how timestamp transforms:
//! - FROM/UNION: Initialize from stale ranges
//! - LET/SELECT/WINDOW/AGG: Track timestamp redefinitions
//! - No-ops (WHERE/SORT/LIMIT): Pass through unchanged
//! - Result: Final replace_range for the pipeline
//!
//! ## Phase 2: Optional Partition Snapping
//!
//! If `partition_unit` is provided, snap replace_range wider to partition boundaries.
//!
//! ## Phase 3: Backward Pass
//!
//! Walk backward using reverse_eval to compute query_range from the replace_range.
//!
//! # Public API
//!
//! ## For Pipelines (no WITH clauses)
//!
//! - [`compute_incremental_ranges_for_pipeline`] - Compute query/replace ranges
//!   - `partition_unit: None` = Time Range Refresh without snapping
//!   - `partition_unit: Some(unit)` = Time Range Refresh with partition snapping
//! - [`is_cascaded_append_eligible_for_pipeline`] - Check cascaded append eligibility
//! - [`detect_supported_strategies_for_pipeline`] - Get all supported strategies
//!
//! ## For Queries (WITH clause support)
//!
//! - [`compute_incremental_ranges_for_query`] - Compute ranges with WITH handling
//! - [`is_cascaded_append_eligible_for_query`] - Check cascaded append with WITHs
//! - [`detect_supported_strategies_for_query`] - Get strategies for full query

use std::{collections::HashMap, ops::RangeInclusive, sync::Arc};

use chrono::{Duration, TimeZone};
use chronoutil::RelativeDuration;
use thiserror::Error;

use hamelin_eval::{
    eval, next_truncation_boundary, reverse_eval, truncate_timestamp,
    value::{TimestampValue, Value},
    Constraint, Environment, EvalError,
};
use hamelin_lib::{
    err::TranslationError,
    func::def::FunctionDef,
    tree::{
        ast::{
            expression::{ExpressionKind, TruncUnit},
            identifier::{Identifier, ParsedSimpleIdentifier, SimpleIdentifier},
        },
        typed_ast::{
            clause::Projections,
            command::{TypedCommandKind, TypedFromCommand, TypedUnionCommand},
            expression::{TypedExpression, TypedExpressionKind},
            pattern::TypedPattern,
            pipeline::{TypedPipeline, ValidPipeline},
            query::TypedStatement,
        },
    },
    types::{Type, CALENDAR_INTERVAL, INTERVAL},
};

type TimeRange = RangeInclusive<chrono::DateTime<chrono::Utc>>;

#[derive(Debug, Clone)]
pub struct IncrementalRange {
    pub replace_range: TimeRange,
    pub query_range: TimeRange,
}

#[derive(Debug, Clone)]
pub struct StrategyDetectionResult {
    pub supported: Vec<IncrementalStrategyKind>,
    pub rejections: Vec<String>,
}

#[derive(Debug, Error)]
pub enum IncrementalAnalysisError {
    #[error("Query had compilation errors: {0}")]
    TreeHadError(TranslationError),
    #[error("Query uses non-deterministic function: {0}")]
    NonDeterministicFunction(String),
    #[error("Stale range not provided for required table")]
    DependencyNotProvided(Identifier),
    #[error("Query has no start time")]
    NoStart,
    #[error("Query has no end time")]
    NoEnd,
    #[error("Could not compute incremental field value")]
    ExpressionEvaluationError(EvalError),
    #[error("Incremental field value is not a timestamp")]
    ValueNotTimestamp,
    #[error("WINDOW .. WITHIN has unbound range")]
    UnboundRange,
    #[error("WITHIN value is not an interval")]
    ValueNotInterval,
    #[error("Command not supported for incremental refresh: {0}")]
    CommandNotSupported(String),
    #[error("This query does not compute the timestamp field from the timestamp field")]
    TimestampLineageError,
    #[error("AGG command must group by the timestamp field for incremental refresh")]
    AggWithoutTimestampGroupBy,
    #[error("Pipeline is empty")]
    EmptyPipeline,
    #[error("DML statements cannot use incremental refresh")]
    DmlNotSupported,
    #[error("WITH clause had errors during incremental analysis")]
    WithClauseError(Identifier),
}

/// Check if an expression contains any non-deterministic function calls.
///
/// Returns the first non-deterministic function definition found, or None if all
/// functions are deterministic. Uses early-exit traversal to stop at the first match.
fn contains_non_deterministic_functions(expr: &TypedExpression) -> Option<Arc<dyn FunctionDef>> {
    expr.find(&mut |expr| {
        // Check if current node is a non-deterministic function
        match &expr.kind {
            TypedExpressionKind::Apply(apply) => !apply.function_def.is_deterministic(),
            _ => false,
        }
    })
    .and_then(|expr| {
        // Extract the function def from the matched expression
        match &expr.kind {
            TypedExpressionKind::Apply(apply) => Some(apply.function_def.clone()),
            _ => None,
        }
    })
}

/// Check if an expression contains non-deterministic functions.
fn check_expression_for_non_deterministic(
    expr: &TypedExpression,
) -> Result<(), IncrementalAnalysisError> {
    contains_non_deterministic_functions(expr)
        .map(|bad_expr| IncrementalAnalysisError::NonDeterministicFunction(bad_expr.to_string()))
        .map_or(Ok(()), Err)
}

/// Check all expressions in projections for non-deterministic functions.
fn check_projections_for_non_deterministic(
    projections: &Projections,
) -> Result<(), IncrementalAnalysisError> {
    projections
        .assignments
        .iter()
        .try_for_each(|assignment| check_expression_for_non_deterministic(&assignment.expression))
}

/// Check all expressions in sort clauses for non-deterministic functions.
fn check_sort_for_non_deterministic(
    sort_expressions: &[hamelin_lib::tree::typed_ast::command::TypedSortExpression],
) -> Result<(), IncrementalAnalysisError> {
    sort_expressions
        .iter()
        .try_for_each(|sort_expr| check_expression_for_non_deterministic(&sort_expr.expression))
}

/// Initialize the replace range from a set of data-source references and their stale ranges.
///
/// Used by both FROM/UNION (which extract references from clauses) and MATCH (which extracts
/// references from patterns). Computes the bounding range across all referenced sources.
fn initialize_replace_range(
    refs: &[Identifier],
    stale_ranges: &HashMap<Identifier, Option<TimeRange>>,
    current_replace_range: &mut Option<TimeRange>,
) -> Result<(), IncrementalAnalysisError> {
    let local_ranges: Result<Vec<_>, _> = refs
        .iter()
        .map(|id| {
            stale_ranges
                .get(id)
                .cloned()
                .ok_or_else(|| IncrementalAnalysisError::DependencyNotProvided(id.clone()))
        })
        .collect();
    let local_ranges = local_ranges?;

    let min = local_ranges
        .iter()
        .flatten()
        .map(|x| x.start())
        .min()
        .ok_or(IncrementalAnalysisError::NoStart)?;

    let max = local_ranges
        .iter()
        .flatten()
        .map(|x| x.end())
        .max()
        .ok_or(IncrementalAnalysisError::NoEnd)?;

    *current_replace_range = Some(*min..=*max);
    Ok(())
}

/// Check if an expression's type is an interval or range-of-interval.
///
/// Interval and range-of-interval types in standalone WITHIN commands are always
/// non-deterministic because they use an implicit now() reference.
///
/// Note: This is specific to standalone WITHIN commands. WINDOW ... WITHIN uses
/// intervals relative to each row's timestamp, which IS deterministic.
fn is_interval_or_range_of_interval_type(expr: &TypedExpression) -> bool {
    match expr.resolved_type.as_ref() {
        t if *t == INTERVAL || *t == CALENDAR_INTERVAL => true,
        Type::Range(range) if *range.of == INTERVAL || *range.of == CALENDAR_INTERVAL => true,
        _ => false,
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum IncrementalStrategyKind {
    CascadedAppend,
    TimeRangeRefresh,
}

/// Compute the replace range for a command with projections.
///
/// This function checks if the incremental field (timestamp) is redefined in the projections.
/// If it is, it evaluates the new expression at both boundaries of the input range to determine
/// the new replace range. If not, it returns the original range unchanged.
fn compute_replace_range_for_projection(
    input_range: &TimeRange,
    incremental_field_name: &SimpleIdentifier,
    projections: &Projections,
) -> Result<TimeRange, IncrementalAnalysisError> {
    // Check if the incremental field is redefined in the projections
    let incremental_field_projection = projections.lookup(&incremental_field_name.clone().into());

    // If there is a definition of timestamp, then resolve to its new value.
    if let Some(ifp) = incremental_field_projection {
        let mut env = Environment::new();
        env.bind(
            incremental_field_name.clone().into(),
            TimestampValue::utc(*input_range.start()).into(),
        );
        let new_start_value = eval(ifp.expression.as_ref(), &env)
            .map_err(|e| IncrementalAnalysisError::ExpressionEvaluationError(e))?
            .try_unwrap_timestamp()
            .map_err(|_| IncrementalAnalysisError::ValueNotTimestamp)?;

        env.bind(
            incremental_field_name.clone().into(),
            TimestampValue::utc(*input_range.end()).into(),
        );
        let new_end_value = eval(ifp.expression.as_ref(), &env)
            .map_err(|e| IncrementalAnalysisError::ExpressionEvaluationError(e))?
            .try_unwrap_timestamp()
            .map_err(|_| IncrementalAnalysisError::ValueNotTimestamp)?;

        // Extract instants from TimestampValue to create TimeRange
        Ok(*new_start_value.instant()..=*new_end_value.instant())
    } else {
        // Timestamp not redefined, return original range
        Ok(input_range.clone())
    }
}

/// Reverse-evaluate timestamp projections to compute input range from output range.
///
/// This is the backward-pass analog of `compute_replace_range_for_projection`.
/// Given an output range that a command must produce, this determines what input
/// timestamp range is needed.
///
/// If the timestamp field is redefined in the projections, uses reverse_eval to
/// determine the required input range. Otherwise, returns the output range unchanged.
fn reverse_eval_timestamp_projection(
    output_range: &TimeRange,
    incremental_field_name: &SimpleIdentifier,
    projections: &Projections,
) -> Result<TimeRange, IncrementalAnalysisError> {
    // Check if the incremental field is redefined in the projections
    let timestamp_id: Identifier = incremental_field_name.clone().into();
    let incremental_field_projection = projections.lookup(&timestamp_id);

    if let Some(ifp) = incremental_field_projection {
        // Use reverse_eval to determine input range needed
        let output_constraint = Constraint::Range {
            min: Some(TimestampValue::utc(*output_range.start()).into()),
            max: Some(TimestampValue::utc(*output_range.end()).into()),
        };

        let env = Environment::new();
        match reverse_eval(ifp.expression.as_ref(), output_constraint, &env) {
            Ok(Some(Constraint::Range {
                min: Some(Value::Timestamp(start)),
                max: Some(Value::Timestamp(end)),
            })) => {
                // Successfully computed input range constraint
                // start and end are already TimestampValue, extract instants
                Ok(*start.instant()..=*end.instant())
            }
            _ => {
                // No variable, non-range constraint, or reverse_eval failed
                // Cannot trace timestamp lineage through this transformation
                Err(IncrementalAnalysisError::TimestampLineageError)
            }
        }
    } else {
        // Timestamp not redefined, input range = output range
        Ok(output_range.clone())
    }
}

/// Phase 1: Forward pass to compute replace_range.
///
/// Walks forward through commands tracking how the timestamp field transforms.
/// Returns the final output replace_range at the end of the pipeline.
fn compute_replace_range_forward_pass(
    pipeline: &TypedPipeline,
    stale_ranges: &HashMap<Identifier, Option<TimeRange>>,
    incremental_field_name: &SimpleIdentifier,
    allow_lookups: bool,
) -> Result<TimeRange, IncrementalAnalysisError> {
    let command_list = match &pipeline.kind {
        Ok(ValidPipeline { commands, .. }) => commands,
        Err(translation_error) => {
            return Err(IncrementalAnalysisError::TreeHadError(
                translation_error.as_ref().clone(),
            ))
        }
    };

    let mut current_replace_range: Option<TimeRange> = None;

    for command in command_list {
        match &command.kind {
            // Error commands
            TypedCommandKind::Join(_)
            | TypedCommandKind::Append(_)
            | TypedCommandKind::Limit(_) => {
                return Err(IncrementalAnalysisError::CommandNotSupported(
                    command.ast.kind.command_name().to_string(),
                ))
            }

            TypedCommandKind::Lookup(typed_lookup_command) => {
                if !allow_lookups {
                    return Err(IncrementalAnalysisError::CommandNotSupported(
                        command.ast.kind.command_name().to_string(),
                    ));
                }
                if let Some(condition) = &typed_lookup_command.condition {
                    check_expression_for_non_deterministic(condition)?;
                }
                continue;
            }
            TypedCommandKind::Error(translation_error) => {
                return Err(IncrementalAnalysisError::TreeHadError(
                    translation_error.as_ref().clone(),
                ))
            }

            // No-op commands (but check for non-deterministic expressions)
            TypedCommandKind::Where(typed_where_command) => {
                check_expression_for_non_deterministic(&typed_where_command.predicate)?;
                continue;
            }

            TypedCommandKind::Within(typed_within_command) => {
                // Check if WITHIN is non-deterministic:
                // - Interval or Range-of-Interval types are always non-deterministic (implicit now())
                // - Other types: check expression for non-deterministic functions

                if is_interval_or_range_of_interval_type(&typed_within_command.duration) {
                    return Err(IncrementalAnalysisError::NonDeterministicFunction(
                        "WITHIN".to_string(),
                    ));
                }

                // For other types, check the expression for non-deterministic functions
                check_expression_for_non_deterministic(&typed_within_command.duration)?;
                continue;
            }

            TypedCommandKind::Sort(_) => {
                continue;
            }

            TypedCommandKind::Drop(typed_drop_command) => {
                let timestamp_id: Identifier = incremental_field_name.clone().into();
                if typed_drop_command.dropped_fields.contains(&timestamp_id) {
                    return Err(IncrementalAnalysisError::TimestampLineageError);
                }
                continue;
            }

            // Initialize replace_range from stale ranges
            TypedCommandKind::From(TypedFromCommand { clauses })
            | TypedCommandKind::Union(TypedUnionCommand { clauses }) => {
                let refs: Result<Vec<_>, _> =
                    clauses
                        .iter()
                        .filter_map(|clause| clause.reference().ok())
                        .map(|reference| {
                            reference.identifier.clone().valid().map_err(|e| {
                                IncrementalAnalysisError::TreeHadError(e.as_ref().clone())
                            })
                        })
                        .collect();
                initialize_replace_range(&refs?, stale_ranges, &mut current_replace_range)?;
            }

            // Commands that transform timestamp
            TypedCommandKind::Window(typed_window_command) => {
                check_projections_for_non_deterministic(&typed_window_command.projections)?;
                check_projections_for_non_deterministic(&typed_window_command.group_by)?;
                if let Some(within) = &typed_window_command.within {
                    check_expression_for_non_deterministic(within)?;
                }
                if let Some(last_range) = &current_replace_range {
                    let new_range = compute_replace_range_for_projection(
                        last_range,
                        incremental_field_name,
                        &typed_window_command.projections,
                    )?;
                    current_replace_range = Some(new_range);
                }
            }

            TypedCommandKind::Agg(typed_agg_command) => {
                check_projections_for_non_deterministic(&typed_agg_command.group_by)?;
                check_projections_for_non_deterministic(&typed_agg_command.aggregates)?;
                check_sort_for_non_deterministic(&typed_agg_command.sort_by)?;
                if let Some(last_range) = &current_replace_range {
                    // Check if timestamp is in GROUP BY projections
                    let timestamp_id: Identifier = incremental_field_name.clone().into();
                    if typed_agg_command.group_by.lookup(&timestamp_id).is_none() {
                        return Err(IncrementalAnalysisError::AggWithoutTimestampGroupBy);
                    }

                    let new_range = compute_replace_range_for_projection(
                        last_range,
                        incremental_field_name,
                        &typed_agg_command.group_by,
                    )?;
                    current_replace_range = Some(new_range);
                }
            }

            TypedCommandKind::Let(typed_let_command) => {
                check_projections_for_non_deterministic(&typed_let_command.projections)?;
                if let Some(last_range) = &current_replace_range {
                    let new_range = compute_replace_range_for_projection(
                        last_range,
                        incremental_field_name,
                        &typed_let_command.projections,
                    )?;
                    current_replace_range = Some(new_range);
                }
            }

            TypedCommandKind::Match(typed_match_command) => {
                check_projections_for_non_deterministic(&typed_match_command.group_by)?;
                check_projections_for_non_deterministic(&typed_match_command.agg)?;
                check_sort_for_non_deterministic(&typed_match_command.sort)?;

                if let Some(within) = &typed_match_command.within {
                    check_expression_for_non_deterministic(within)?;

                    // WITHIN requires exactly one sort expression on 'timestamp' for incremental refresh
                    // MATCH with WITHIN requires the SORT column type to match the WITHIN interval type,
                    // but incremental refresh specifically requires the SORT column to be named "timestamp"
                    // Note: The typed AST guarantees WITHIN has singular SORT (explicit or implicit)
                    if typed_match_command.sort.len() != 1 {
                        return Err(IncrementalAnalysisError::CommandNotSupported(
                            "MATCH with WITHIN requires exactly one sort expression for incremental refresh".to_string(),
                        ));
                    }

                    let is_timestamp_sort = matches!(
                        &typed_match_command.sort[0].expression.ast.kind,
                        ExpressionKind::FieldReference(col_ref)
                        if matches!(&col_ref.field_name, ParsedSimpleIdentifier::Valid(id) if id == "timestamp")
                    );

                    if !is_timestamp_sort {
                        return Err(IncrementalAnalysisError::CommandNotSupported(
                            "MATCH with WITHIN requires SORT by 'timestamp' field for incremental refresh".to_string(),
                        ));
                    }
                }

                // MATCH acts as a data source: initialize replace_range from pattern references
                let refs = collect_match_pattern_references(&typed_match_command.patterns)?;
                initialize_replace_range(&refs, stale_ranges, &mut current_replace_range)?;
            }

            TypedCommandKind::Select(typed_select_command) => {
                check_projections_for_non_deterministic(&typed_select_command.projections)?;
                if let Some(last_range) = &current_replace_range {
                    let timestamp_id: Identifier = incremental_field_name.clone().into();
                    if typed_select_command
                        .projections
                        .lookup(&timestamp_id)
                        .is_none()
                    {
                        return Err(IncrementalAnalysisError::TimestampLineageError);
                    }

                    let new_range = compute_replace_range_for_projection(
                        last_range,
                        incremental_field_name,
                        &typed_select_command.projections,
                    )?;
                    current_replace_range = Some(new_range);
                }
            }

            TypedCommandKind::Explode(typed_explode_command) => {
                let timestamp_id: Identifier = incremental_field_name.clone().into();
                for item in &typed_explode_command.items {
                    check_expression_for_non_deterministic(&item.assignment.expression)?;
                    // Compare valid identifier with timestamp_id
                    if let Ok(identifier) = item.assignment.identifier.valid_ref() {
                        if *identifier == timestamp_id {
                            return Err(IncrementalAnalysisError::TimestampLineageError);
                        }
                    }
                }
                continue;
            }

            TypedCommandKind::Parse(typed_parse_command) => {
                if let Some(source) = &typed_parse_command.source {
                    check_expression_for_non_deterministic(source)?;
                }
                let timestamp_id: Identifier = incremental_field_name.clone().into();
                // Check if any valid identifier matches timestamp_id
                if typed_parse_command
                    .identifiers
                    .iter()
                    .any(|id| id.valid_ref().ok() == Some(&timestamp_id))
                {
                    return Err(IncrementalAnalysisError::TimestampLineageError);
                }
                continue;
            }

            TypedCommandKind::Unnest(typed_unnest_command) => {
                check_expression_for_non_deterministic(&typed_unnest_command.expression)?;
                let timestamp_id: Identifier = incremental_field_name.clone().into();
                if command.output_schema.lookup(&timestamp_id).is_some() {
                    return Err(IncrementalAnalysisError::TimestampLineageError);
                }
                continue;
            }

            TypedCommandKind::Nest(typed_nest_command) => {
                let timestamp_id: Identifier = incremental_field_name.clone().into();
                // Compare valid identifier with timestamp_id
                if let Ok(identifier) = typed_nest_command.identifier.valid_ref() {
                    if *identifier == timestamp_id {
                        return Err(IncrementalAnalysisError::TimestampLineageError);
                    }
                }
                continue;
            }
        }
    }

    current_replace_range.ok_or(IncrementalAnalysisError::EmptyPipeline)
}

/// Phase 2: Snap replace_range to partition boundaries.
///
/// Snaps the range WIDER to ensure complete partition coverage.
/// Uses truncate_timestamp to find partition start and next_truncation_boundary for partition end.
fn snap_to_partition_boundaries(range: &TimeRange, unit: &TruncUnit) -> TimeRange {
    // Convert DateTime<Utc> to TimestampValue, truncate, then extract instant
    let snapped_start = truncate_timestamp(&TimestampValue::utc(*range.start()), unit)
        .map(|ts| *ts.instant())
        .unwrap_or(*range.start());

    let truncated_end = truncate_timestamp(&TimestampValue::utc(*range.end()), unit)
        .unwrap_or_else(|_| TimestampValue::utc(*range.end()));
    let snapped_end = next_truncation_boundary(&truncated_end, unit)
        .map(|ts| *ts.instant())
        .unwrap_or(*range.end());

    snapped_start..=snapped_end
}

/// Phase 3: Backward pass to compute query_range.
///
/// Iterates backward through commands, using reverse_eval to determine what input
/// timestamp range each command needs. Stores per-command query ranges and aggregates
/// at the end.
fn compute_query_range_backward_pass(
    pipeline: &TypedPipeline,
    final_replace_range: &TimeRange,
    incremental_field_name: &SimpleIdentifier,
) -> Result<TimeRange, IncrementalAnalysisError> {
    let command_list = match &pipeline.kind {
        Ok(ValidPipeline { commands, .. }) => commands,
        Err(e) => return Err(IncrementalAnalysisError::TreeHadError(e.as_ref().clone())),
    };

    // Track current output range as we iterate backwards
    let mut current_output_range = final_replace_range.clone();

    // Store per-command query ranges
    let mut query_ranges: Vec<TimeRange> = Vec::new();

    // Iterate BACKWARDS through commands
    for command in command_list.iter().rev() {
        match &command.kind {
            TypedCommandKind::Let(typed_let_command) => {
                let input_range = reverse_eval_timestamp_projection(
                    &current_output_range,
                    incremental_field_name,
                    &typed_let_command.projections,
                )?;
                query_ranges.push(input_range.clone());
                current_output_range = input_range;
            }

            TypedCommandKind::Select(typed_select_command) => {
                let input_range = reverse_eval_timestamp_projection(
                    &current_output_range,
                    incremental_field_name,
                    &typed_select_command.projections,
                )?;
                query_ranges.push(input_range.clone());
                current_output_range = input_range;
            }

            TypedCommandKind::Window(typed_window_command) => {
                let input_range = reverse_eval_timestamp_projection(
                    &current_output_range,
                    incremental_field_name,
                    &typed_window_command.projections,
                )?;

                // Compute WITHIN expansion
                let mut query_range_lower = *input_range.start();
                let mut query_range_upper = *input_range.end();

                if let Some(within) = &typed_window_command.within {
                    // Check if WITHIN expression is an error node
                    if let TypedExpressionKind::Error(err) = &within.kind {
                        return Err(IncrementalAnalysisError::TreeHadError(
                            err.error.as_ref().clone(),
                        ));
                    }

                    let env = Environment::new();
                    if let Ok(evaluated_within) = eval(within.as_ref(), &env) {
                        match evaluated_within {
                            Value::Interval(time_delta) => {
                                if time_delta > Duration::zero() {
                                    query_range_upper = query_range_upper + time_delta;
                                } else {
                                    query_range_lower = query_range_lower + time_delta;
                                }
                            }
                            Value::CalendarInterval(months) => {
                                let calendar_interval = RelativeDuration::months(months);
                                if months > 0 {
                                    query_range_upper = query_range_upper + calendar_interval;
                                } else {
                                    query_range_lower = query_range_lower + calendar_interval;
                                }
                            }
                            Value::Range(range_value) => {
                                if let (Some(l), Some(u)) = (&range_value.lower, &range_value.upper)
                                {
                                    match (l, u) {
                                        (Value::Interval(l), Value::Interval(u)) => {
                                            query_range_lower = query_range_lower + *l;
                                            query_range_upper = query_range_upper + *u;
                                        }
                                        (
                                            Value::CalendarInterval(l),
                                            Value::CalendarInterval(u),
                                        ) => {
                                            query_range_lower =
                                                query_range_lower + RelativeDuration::months(*l);
                                            query_range_upper =
                                                query_range_upper + RelativeDuration::months(*u);
                                        }
                                        _ => {}
                                    }
                                }
                            }
                            _ => {}
                        }
                    }
                }

                query_ranges.push(query_range_lower..=query_range_upper);
                current_output_range = input_range;
            }

            TypedCommandKind::Match(typed_match_command) => {
                // MATCH is a data source (like FROM) — expand for WITHIN and stop
                let input_range = current_output_range.clone();
                let mut query_range_lower = *input_range.start();
                let query_range_upper = *input_range.end();

                if let Some(within) = &typed_match_command.within {
                    // Check if WITHIN expression is an error node
                    if let TypedExpressionKind::Error(err) = &within.kind {
                        return Err(IncrementalAnalysisError::TreeHadError(
                            err.error.as_ref().clone(),
                        ));
                    }

                    let env = Environment::new();
                    if let Ok(evaluated_within) = eval(within, &env) {
                        match evaluated_within {
                            Value::Interval(time_delta) => {
                                // Expand query range backward to capture patterns that might match
                                query_range_lower = query_range_lower - time_delta;
                            }
                            Value::CalendarInterval(months) => {
                                query_range_lower =
                                    query_range_lower + RelativeDuration::months(-months);
                            }
                            _ => {}
                        }
                    }
                }

                query_ranges.push(query_range_lower..=query_range_upper);
                break;
            }

            TypedCommandKind::Agg(typed_agg_command) => {
                let input_range = reverse_eval_timestamp_projection(
                    &current_output_range,
                    incremental_field_name,
                    &typed_agg_command.group_by,
                )?;
                query_ranges.push(input_range.clone());
                current_output_range = input_range;
            }

            // No-op commands
            TypedCommandKind::Where(_)
            | TypedCommandKind::Sort(_)
            | TypedCommandKind::Limit(_)
            | TypedCommandKind::Within(_)
            | TypedCommandKind::Drop(_)
            | TypedCommandKind::Lookup(_) => {
                query_ranges.push(current_output_range.clone());
            }

            // FROM/UNION - start of pipeline
            TypedCommandKind::From(_) | TypedCommandKind::Union(_) => {
                query_ranges.push(current_output_range.clone());
                break;
            }

            // Other commands
            _ => {
                query_ranges.push(current_output_range.clone());
            }
        }
    }

    // Aggregate: min of starts, max of ends
    let query_range_start = query_ranges
        .iter()
        .map(|r| r.start())
        .min()
        .ok_or(IncrementalAnalysisError::EmptyPipeline)?;

    let query_range_end = query_ranges
        .iter()
        .map(|r| r.end())
        .max()
        .ok_or(IncrementalAnalysisError::EmptyPipeline)?;

    Ok(*query_range_start..=*query_range_end)
}

/// Compute the incremental range of a given query using a unified three-phase approach.
///
/// This function supports both Time Range Refresh (when `partition_unit` is None) and
/// Replace Partition (when `partition_unit` is Some).
///
/// Algorithm:
/// 1. **Phase 1: Forward Pass** - Compute replace_range by tracking timestamp transformations
/// 2. **Phase 2: Optional Snapping** - If partition_unit provided, snap replace_range wider to partition boundaries
/// 3. **Phase 3: Backward Pass** - Recompute query_range by working backwards using reverse_eval
///
/// Parameters:
/// - `pipeline`: The query pipeline to analyze
/// - `stale_ranges`: Map of table names to their stale timestamp ranges
/// - `incremental_field_name`: Name of the timestamp field (e.g., "timestamp")
/// - `partition_unit`: Optional truncation unit for Replace Partition strategy (None = Time Range Refresh)
pub fn compute_incremental_ranges_for_pipeline(
    pipeline: &TypedPipeline,
    stale_ranges: HashMap<Identifier, Option<TimeRange>>,
    incremental_field_name: &SimpleIdentifier,
    partition_unit: Option<TruncUnit>,
    allow_lookups: bool,
) -> Result<IncrementalRange, IncrementalAnalysisError> {
    // Phase 1: Forward pass - compute replace_range
    let replace_range = compute_replace_range_forward_pass(
        &pipeline,
        &stale_ranges,
        incremental_field_name,
        allow_lookups,
    )?;

    // Phase 2: Optional snapping - snap to partition boundaries if requested
    let replace_range = if let Some(unit) = partition_unit {
        snap_to_partition_boundaries(&replace_range, &unit)
    } else {
        replace_range
    };

    // Phase 3: Backward pass - compute query_range from (possibly snapped) replace_range
    let query_range =
        compute_query_range_backward_pass(&pipeline, &replace_range, incremental_field_name)?;

    Ok(IncrementalRange {
        replace_range,
        query_range,
    })
}

/// Check if a pipeline is eligible for cascaded append incremental strategy.
///
/// Cascaded append is the simplest incremental strategy where rows can be computed
/// in isolation and simply appended to the result. This requires that the query
/// only performs stateless row-by-row transformations with no lookback, aggregation,
/// or joins.
///
/// Allowed commands:
/// - FROM, UNION: Data sources
/// - WHERE, SORT, LIMIT, WITHIN: Filtering and ordering
/// - DROP: Field removal
/// - LET, SELECT: Field transformations (stateless)
/// - EXPLODE, UNNEST: Row-by-row array transformations
///
/// Disallowed commands:
/// - WINDOW: Requires lookback over other rows
/// - AGG: Requires grouping and aggregation across rows
/// - JOIN, MATCH, LOOKUP, APPEND: Require access to full datasets
/// - NEST, PARSE: Complex transformations that may affect incrementality
pub fn is_cascaded_append_eligible_for_pipeline(
    pipeline: Arc<TypedPipeline>,
    allow_lookups: bool,
) -> Result<(), IncrementalAnalysisError> {
    let command_list = match &pipeline.kind {
        Ok(ValidPipeline { commands, .. }) => commands,
        Err(e) => return Err(IncrementalAnalysisError::TreeHadError(e.as_ref().clone())),
    };

    for command in command_list {
        match &command.kind {
            // Allowed commands - stateless row-by-row operations (but check for non-deterministic expressions)
            TypedCommandKind::From(_) | TypedCommandKind::Union(_) | TypedCommandKind::Sort(_) => {
                // These commands have no expressions to check
                continue;
            }

            TypedCommandKind::Where(typed_where_command) => {
                check_expression_for_non_deterministic(&typed_where_command.predicate)?;
                continue;
            }

            TypedCommandKind::Let(typed_let_command) => {
                check_projections_for_non_deterministic(&typed_let_command.projections)?;
                continue;
            }

            TypedCommandKind::Select(typed_select_command) => {
                check_projections_for_non_deterministic(&typed_select_command.projections)?;
                continue;
            }

            TypedCommandKind::Explode(typed_explode_command) => {
                for item in &typed_explode_command.items {
                    check_expression_for_non_deterministic(&item.assignment.expression)?;
                }
                continue;
            }

            TypedCommandKind::Unnest(typed_unnest_command) => {
                check_expression_for_non_deterministic(&typed_unnest_command.expression)?;
                continue;
            }

            TypedCommandKind::Within(typed_within_command) => {
                // Check if WITHIN is non-deterministic:
                // - Interval or Range-of-Interval types are always non-deterministic (implicit now())
                // - Other types: check expression for non-deterministic functions

                if is_interval_or_range_of_interval_type(&typed_within_command.duration) {
                    return Err(IncrementalAnalysisError::NonDeterministicFunction(
                        "WITHIN".to_string(),
                    ));
                }

                // For other types, check the expression for non-deterministic functions
                check_expression_for_non_deterministic(&typed_within_command.duration)?;
                continue;
            }

            TypedCommandKind::Drop(_) => {
                // No expressions to check
                continue;
            }

            TypedCommandKind::Lookup(typed_lookup_command) => {
                if !allow_lookups {
                    return Err(IncrementalAnalysisError::CommandNotSupported(
                        command.ast.kind.command_name().to_string(),
                    ));
                }
                if let Some(condition) = &typed_lookup_command.condition {
                    check_expression_for_non_deterministic(condition)?;
                }
                continue;
            }

            // Disallowed commands - require lookback, aggregation, or full dataset
            TypedCommandKind::Window(_)
            | TypedCommandKind::Agg(_)
            | TypedCommandKind::Join(_)
            | TypedCommandKind::Match(_)
            | TypedCommandKind::Append(_)
            | TypedCommandKind::Nest(_)
            | TypedCommandKind::Parse(_)
            | TypedCommandKind::Limit(_) => {
                return Err(IncrementalAnalysisError::CommandNotSupported(
                    command.ast.kind.command_name().to_string(),
                ))
            }

            // Error commands
            TypedCommandKind::Error(translation_error) => {
                return Err(IncrementalAnalysisError::TreeHadError(
                    translation_error.as_ref().clone(),
                ))
            }
        }
    }

    Ok(())
}

/// Recursively collect all table/CTE reference identifiers from MATCH patterns.
///
/// Propagates identifier validation errors (consistent with FROM/UNION handling).
/// Error patterns and error FROM clauses are skipped (already reported during type-checking).
fn collect_match_pattern_references(
    patterns: &[TypedPattern],
) -> Result<Vec<Identifier>, IncrementalAnalysisError> {
    let mut refs = Vec::new();
    for pattern in patterns {
        match pattern {
            TypedPattern::Quantified(quant) => {
                if let Ok(table_ref) = quant.typed_from.reference() {
                    let id =
                        table_ref.identifier.clone().valid().map_err(|e| {
                            IncrementalAnalysisError::TreeHadError(e.as_ref().clone())
                        })?;
                    refs.push(id);
                }
            }
            TypedPattern::Nested(nested) => {
                refs.extend(collect_match_pattern_references(&nested.patterns)?);
            }
            TypedPattern::Error(_) => {}
        }
    }
    Ok(refs)
}

/// Extract table references from a pipeline's first command (FROM, UNION, or MATCH).
///
/// These data-source commands are always the first command in a pipeline.
/// This helper function is used to build bogus stale_ranges for strategy detection.
fn extract_table_references(pipeline: &TypedPipeline) -> Vec<Identifier> {
    let first = match &pipeline.kind {
        Ok(ValidPipeline { commands, .. }) => commands.first(),
        Err(_) => return Vec::new(),
    };

    match first.map(|c| &c.kind) {
        Some(TypedCommandKind::From(TypedFromCommand { clauses }))
        | Some(TypedCommandKind::Union(TypedUnionCommand { clauses })) => clauses
            .iter()
            .filter_map(|c| c.reference().ok()?.identifier.clone().valid().ok())
            .collect(),
        Some(TypedCommandKind::Match(match_cmd)) => {
            collect_match_pattern_references(&match_cmd.patterns).unwrap_or_default()
        }
        _ => Vec::new(),
    }
}

/// Detect which incremental strategies are supported by a given pipeline.
///
/// Returns a vector of supported strategies in priority order:
/// 1. CascadedAppend (most efficient - simple row append)
/// 2. TimeRangeRefresh (general purpose - handles windowing and aggregation)
///    - TimeRangeRefresh supports optional partition snapping via the `partition_unit` parameter
///
/// Uses a bogus test range (Jan 1-2, 2024) to test eligibility without requiring
/// actual stale range data.
pub fn detect_supported_strategies_for_pipeline(
    pipeline: Arc<TypedPipeline>,
    allow_lookups: bool,
) -> StrategyDetectionResult {
    let mut supported = Vec::new();
    let mut rejections = Vec::new();

    // Priority 1: Try Cascaded Append
    match is_cascaded_append_eligible_for_pipeline(Arc::clone(&pipeline), allow_lookups) {
        Ok(()) => supported.push(IncrementalStrategyKind::CascadedAppend),
        Err(e) => rejections.push(format!(
            "Cannot compute incrementally with cascaded append: {e}"
        )),
    }

    // Create bogus test range for testing Time Range Refresh
    // Use Jan 1-2, 2024 as a simple test range
    let test_start = chrono::Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
    let test_end = chrono::Utc.with_ymd_and_hms(2024, 1, 2, 0, 0, 0).unwrap();
    let test_range = test_start..=test_end;

    // Extract table references and create bogus stale_ranges
    let table_refs = extract_table_references(&pipeline);
    let bogus_stale_ranges: HashMap<Identifier, Option<TimeRange>> = table_refs
        .into_iter()
        .map(|id| (id, Some(test_range.clone())))
        .collect();

    // Priority 2: Try Time Range Refresh
    // Use "timestamp" as default incremental field name
    let incremental_field = SimpleIdentifier::new("timestamp");
    match compute_incremental_ranges_for_pipeline(
        &pipeline,
        bogus_stale_ranges,
        &incremental_field,
        None,
        allow_lookups,
    ) {
        Ok(_) => supported.push(IncrementalStrategyKind::TimeRangeRefresh),
        Err(e) => rejections.push(format!(
            "Cannot compute incrementally with time range refresh: {e}"
        )),
    }

    StrategyDetectionResult {
        supported,
        rejections,
    }
}

/// Compute incremental ranges for a full query statement with WITH clause support.
///
/// This function implements a unified three-phase approach across ALL CTEs and the main pipeline:
///
/// **Phase 1: Forward pass** - Walk through ALL CTEs in order, then main pipeline
/// - Track replace_range transformations throughout
/// - CTEs define intermediate replace_ranges that become available as dependencies
///
/// **Phase 2: Snap** - Optionally snap the final replace_range from main pipeline
///
/// **Phase 3: Backward pass** - Walk BACKWARD from end of main pipeline through all CTEs
/// - Accumulate query_range min/max at EVERY command (FROM in CTEs, transforms, etc.)
/// - This ensures we capture all data requirements including CTE dependencies
///
/// # Key Insight
/// A CTE's replace_range is computed in forward pass and becomes a "stale range" dependency
/// for later CTEs/main. But the query_range is computed in backward pass which happens AFTER
/// forward pass completes, walking back through ALL commands including those in CTEs.
///
/// # Arguments
/// * `typed_statement` - The type-checked statement with WITH clauses
/// * `initial_stale_ranges` - Stale ranges for base tables (not WITHs)
/// * `incremental_field_name` - The timestamp field to track (e.g., "timestamp")
/// * `partition_unit` - Optional partition snapping (None = no snap, Some(unit) = snap wider)
///
/// # Returns
/// * `Ok(IncrementalRange)` - Aggregated query range (from backward pass) and final replace range
/// * `Err(IncrementalAnalysisError)` - If incremental refresh is not possible
pub fn compute_incremental_ranges_for_query(
    typed_statement: &TypedStatement,
    initial_stale_ranges: HashMap<Identifier, Option<TimeRange>>,
    incremental_field_name: &SimpleIdentifier,
    partition_unit: Option<TruncUnit>,
    allow_lookups: bool,
) -> Result<IncrementalRange, IncrementalAnalysisError> {
    // Extract main pipeline (must be a query, not DML)
    if !typed_statement.side_effect.is_none() {
        return Err(IncrementalAnalysisError::DmlNotSupported);
    }
    let main_pipeline = &typed_statement.pipeline;

    // Build map of CTE replace ranges (computed via forward pass for each CTE independently)
    // This is needed so FROM commands can reference them
    let mut cte_replace_ranges: HashMap<Identifier, TimeRange> = HashMap::new();

    for with_clause in &typed_statement.with_clauses {
        // Build stale_ranges for this CTE
        let mut cte_stale_ranges = initial_stale_ranges.clone();
        // Add previously computed CTE replace ranges
        for (name, range) in &cte_replace_ranges {
            cte_stale_ranges.insert(name.clone(), Some(range.clone()));
        }

        // Forward pass just for this CTE to get its replace_range
        let cte_replace_range = compute_replace_range_forward_pass(
            &with_clause.pipeline,
            &cte_stale_ranges,
            incremental_field_name,
            allow_lookups,
        )?;

        // Extract valid identifier for HashMap key, propagate parse errors
        let cte_name = with_clause
            .name
            .clone()
            .valid()
            .map_err(|e| IncrementalAnalysisError::TreeHadError(e.as_ref().clone()))?;
        cte_replace_ranges.insert(cte_name, cte_replace_range);
    }

    // ==== PHASE 1: Forward pass on main pipeline ====
    let mut main_stale_ranges = initial_stale_ranges.clone();
    // Add all CTE replace ranges as stale ranges for main
    for (name, range) in &cte_replace_ranges {
        main_stale_ranges.insert(name.clone(), Some(range.clone()));
    }

    let main_replace_range = compute_replace_range_forward_pass(
        main_pipeline,
        &main_stale_ranges,
        incremental_field_name,
        allow_lookups,
    )?;

    // ==== PHASE 2: Optional snapping ====
    let final_replace_range = if let Some(unit) = partition_unit {
        snap_to_partition_boundaries(&main_replace_range, &unit)
    } else {
        main_replace_range
    };

    // ==== PHASE 3: Backward pass across EVERYTHING ====
    // Walk backward through main pipeline, then all CTEs in reverse order
    // Accumulate query_range min/max at every command

    // Start with main pipeline backward pass
    let main_query_range = compute_query_range_backward_pass(
        main_pipeline,
        &final_replace_range,
        incremental_field_name,
    )?;

    let mut all_query_starts = vec![*main_query_range.start()];
    let mut all_query_ends = vec![*main_query_range.end()];

    // Now do backward pass for each CTE (in reverse order would be ideal, but any order works for aggregation)
    for with_clause in &typed_statement.with_clauses {
        // Each CTE's backward pass starts from ITS OWN replace_range (computed in forward pass)
        // Extract valid identifier for HashMap lookup, propagate parse errors
        let cte_name = with_clause
            .name
            .clone()
            .valid()
            .map_err(|e| IncrementalAnalysisError::TreeHadError(e.as_ref().clone()))?;
        let cte_replace_range = cte_replace_ranges.get(&cte_name).unwrap();

        let cte_query_range = compute_query_range_backward_pass(
            &with_clause.pipeline,
            cte_replace_range,
            incremental_field_name,
        )?;

        all_query_starts.push(*cte_query_range.start());
        all_query_ends.push(*cte_query_range.end());
    }

    // Aggregate: min of all starts, max of all ends
    let aggregated_query_start = all_query_starts
        .into_iter()
        .min()
        .ok_or(IncrementalAnalysisError::EmptyPipeline)?;

    let aggregated_query_end = all_query_ends
        .into_iter()
        .max()
        .ok_or(IncrementalAnalysisError::EmptyPipeline)?;

    Ok(IncrementalRange {
        query_range: aggregated_query_start..=aggregated_query_end,
        replace_range: final_replace_range,
    })
}

/// Check if a full query statement is eligible for cascaded append strategy.
///
/// This function processes WITH clauses (CTEs) in declaration order, checking each
/// for cascaded append eligibility. If any WITH or the main pipeline contains
/// commands that are not eligible for cascaded append, the function returns an error.
///
/// Eligible commands: FROM, UNION, WHERE, SORT, LIMIT, DROP, LET, SELECT, WITHIN
/// Ineligible commands: WINDOW, AGG, JOIN, MATCH, APPEND, EXPLODE, UNNEST, NEST, PARSE
///
/// # Arguments
/// * `typed_statement` - The type-checked statement with WITH clauses
///
/// # Returns
/// * `Ok(())` - If cascaded append is supported
/// * `Err(IncrementalAnalysisError)` - If not supported
pub fn is_cascaded_append_eligible_for_query(
    typed_statement: &TypedStatement,
    allow_lookups: bool,
) -> Result<(), IncrementalAnalysisError> {
    // Check all WITH clauses first
    for with_clause in &typed_statement.with_clauses {
        is_cascaded_append_eligible_for_pipeline(with_clause.pipeline.clone(), allow_lookups)?;
    }

    // Extract and check main pipeline (must be a query, not DML)
    if !typed_statement.side_effect.is_none() {
        return Err(IncrementalAnalysisError::DmlNotSupported);
    }

    is_cascaded_append_eligible_for_pipeline(typed_statement.pipeline.clone(), allow_lookups)
}

/// Detect which incremental strategies are supported by a full query statement with WITH clauses.
///
/// Returns a vector of supported strategies in priority order:
/// 1. CascadedAppend (most efficient - simple row append)
/// 2. TimeRangeRefresh (general purpose - handles windowing and aggregation)
///
/// Uses a bogus test range (Jan 1-2, 2024) to test eligibility without requiring
/// actual stale range data. Processes WITH clauses to ensure the entire query
/// (including CTEs) supports each strategy.
///
/// # Arguments
/// * `typed_statement` - The type-checked statement with WITH clauses
///
/// # Returns
/// * `StrategyDetectionResult` - Supported strategies and rejection reasons
pub fn detect_supported_strategies_for_query(
    typed_statement: &TypedStatement,
    allow_lookups: bool,
) -> StrategyDetectionResult {
    let mut supported = Vec::new();
    let mut rejections = Vec::new();

    match is_cascaded_append_eligible_for_query(typed_statement, allow_lookups) {
        Ok(()) => supported.push(IncrementalStrategyKind::CascadedAppend),
        Err(e) => rejections.push(format!("CascadedAppend: {e}")),
    }

    let test_start = chrono::Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
    let test_end = chrono::Utc.with_ymd_and_hms(2024, 1, 2, 0, 0, 0).unwrap();
    let test_range = test_start..=test_end;

    let mut all_table_refs = Vec::new();
    for with_clause in &typed_statement.with_clauses {
        all_table_refs.extend(extract_table_references(&with_clause.pipeline));
    }
    all_table_refs.extend(extract_table_references(&typed_statement.pipeline));

    let bogus_stale_ranges: HashMap<Identifier, Option<TimeRange>> = all_table_refs
        .into_iter()
        .map(|id| (id, Some(test_range.clone())))
        .collect();

    let incremental_field = SimpleIdentifier::new("timestamp");
    match compute_incremental_ranges_for_query(
        typed_statement,
        bogus_stale_ranges,
        &incremental_field,
        None,
        allow_lookups,
    ) {
        Ok(_) => supported.push(IncrementalStrategyKind::TimeRangeRefresh),
        Err(e) => rejections.push(format!("TimeRangeRefresh: {e}")),
    }

    StrategyDetectionResult {
        supported,
        rejections,
    }
}

#[cfg(test)]
mod tests;