Skip to main content

paimon_datafusion/
relation_planner.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Custom [`RelationPlanner`] for Paimon time travel via `VERSION AS OF` and `TIMESTAMP AS OF`.
19
20use std::collections::HashMap;
21use std::fmt::Debug;
22use std::sync::Arc;
23
24use datafusion::catalog::default_table_source::{provider_as_source, source_as_provider};
25use datafusion::common::TableReference;
26use datafusion::error::Result as DFResult;
27use datafusion::logical_expr::builder::LogicalPlanBuilder;
28use datafusion::logical_expr::planner::{
29    PlannedRelation, RelationPlanner, RelationPlannerContext, RelationPlanning,
30};
31use datafusion::sql::sqlparser::ast::{self, TableFactor, TableVersion};
32use paimon::spec::{SCAN_TIMESTAMP_MILLIS_OPTION, SCAN_VERSION_OPTION};
33
34use crate::table::PaimonTableProvider;
35
36/// A [`RelationPlanner`] that intercepts `VERSION AS OF` and `TIMESTAMP AS OF`
37/// clauses on Paimon tables and resolves them to time travel options.
38///
39/// - `VERSION AS OF <integer or string>` → sets `scan.version` option on the table.
40///   At scan time, the version is resolved: tag name (if exists) → snapshot id → error.
41/// - `TIMESTAMP AS OF <timestamp string>` → parsed as a timestamp, sets `scan.timestamp-millis`.
42#[derive(Debug)]
43pub struct PaimonRelationPlanner;
44
45impl PaimonRelationPlanner {
46    pub fn new() -> Self {
47        Self
48    }
49}
50
51impl Default for PaimonRelationPlanner {
52    fn default() -> Self {
53        Self::new()
54    }
55}
56
57impl RelationPlanner for PaimonRelationPlanner {
58    fn plan_relation(
59        &self,
60        relation: TableFactor,
61        context: &mut dyn RelationPlannerContext,
62    ) -> DFResult<RelationPlanning> {
63        // Only handle Table factors with a version clause.
64        let TableFactor::Table {
65            ref name,
66            ref version,
67            ..
68        } = relation
69        else {
70            return Ok(RelationPlanning::Original(Box::new(relation)));
71        };
72
73        let extra_options = match version {
74            Some(TableVersion::VersionAsOf(expr)) => resolve_version_as_of(expr)?,
75            Some(TableVersion::TimestampAsOf(expr)) => resolve_timestamp_as_of(expr)?,
76            _ => return Ok(RelationPlanning::Original(Box::new(relation))),
77        };
78
79        // Resolve the table reference.
80        let table_ref = object_name_to_table_reference(name, context)?;
81        let source = context
82            .context_provider()
83            .get_table_source(table_ref.clone())?;
84        let provider = source_as_provider(&source)?;
85
86        // Check if this is a Paimon table.
87        let Some(paimon_provider) = provider.as_any().downcast_ref::<PaimonTableProvider>() else {
88            return Ok(RelationPlanning::Original(Box::new(relation)));
89        };
90
91        let new_table = paimon_provider.table().copy_with_options(extra_options);
92        let new_provider = PaimonTableProvider::try_new(new_table)?;
93        let new_source = provider_as_source(Arc::new(new_provider));
94
95        // Destructure to get alias.
96        let TableFactor::Table { alias, .. } = relation else {
97            unreachable!()
98        };
99
100        let plan = LogicalPlanBuilder::scan(table_ref, new_source, None)?.build()?;
101        Ok(RelationPlanning::Planned(Box::new(PlannedRelation::new(
102            plan, alias,
103        ))))
104    }
105}
106
107/// Convert a sqlparser `ObjectName` to a DataFusion `TableReference`.
108fn object_name_to_table_reference(
109    name: &ast::ObjectName,
110    context: &mut dyn RelationPlannerContext,
111) -> DFResult<TableReference> {
112    let idents: Vec<String> = name
113        .0
114        .iter()
115        .map(|part| {
116            let ident = part.as_ident().ok_or_else(|| {
117                datafusion::error::DataFusionError::Plan(format!(
118                    "Expected simple identifier in table reference, got: {part}"
119                ))
120            })?;
121            Ok(context.normalize_ident(ident.clone()))
122        })
123        .collect::<DFResult<_>>()?;
124    match idents.len() {
125        1 => Ok(TableReference::bare(idents[0].clone())),
126        2 => Ok(TableReference::partial(
127            idents[0].clone(),
128            idents[1].clone(),
129        )),
130        3 => Ok(TableReference::full(
131            idents[0].clone(),
132            idents[1].clone(),
133            idents[2].clone(),
134        )),
135        _ => Err(datafusion::error::DataFusionError::Plan(format!(
136            "Unsupported table reference: {name}"
137        ))),
138    }
139}
140
141/// Resolve `VERSION AS OF <expr>` into `scan.version` option.
142///
143/// The raw value (integer or string) is passed through as-is.
144/// Resolution (tag vs snapshot id) happens at scan time in `TableScan`.
145fn resolve_version_as_of(expr: &ast::Expr) -> DFResult<HashMap<String, String>> {
146    let version = match expr {
147        ast::Expr::Value(v) => match &v.value {
148            ast::Value::Number(n, _) => n.clone(),
149            ast::Value::SingleQuotedString(s) | ast::Value::DoubleQuotedString(s) => s.clone(),
150            _ => {
151                return Err(datafusion::error::DataFusionError::Plan(format!(
152                    "Unsupported VERSION AS OF expression: {expr}"
153                )))
154            }
155        },
156        _ => {
157            return Err(datafusion::error::DataFusionError::Plan(format!(
158                "Unsupported VERSION AS OF expression: {expr}. Expected an integer snapshot id or a tag name."
159            )))
160        }
161    };
162    Ok(HashMap::from([(SCAN_VERSION_OPTION.to_string(), version)]))
163}
164
165/// Resolve `TIMESTAMP AS OF <expr>` into `scan.timestamp-millis` option.
166fn resolve_timestamp_as_of(expr: &ast::Expr) -> DFResult<HashMap<String, String>> {
167    match expr {
168        ast::Expr::Value(v) => match &v.value {
169            ast::Value::SingleQuotedString(s) | ast::Value::DoubleQuotedString(s) => {
170                let millis = parse_timestamp_to_millis(s)?;
171                Ok(HashMap::from([(
172                    SCAN_TIMESTAMP_MILLIS_OPTION.to_string(),
173                    millis.to_string(),
174                )]))
175            }
176            _ => Err(datafusion::error::DataFusionError::Plan(format!(
177                "Unsupported TIMESTAMP AS OF expression: {expr}. Expected a timestamp string."
178            ))),
179        },
180        _ => Err(datafusion::error::DataFusionError::Plan(format!(
181            "Unsupported TIMESTAMP AS OF expression: {expr}. Expected a timestamp string."
182        ))),
183    }
184}
185
186/// Parse a timestamp string to milliseconds since epoch (using local timezone).
187///
188/// Matches Java Paimon's behavior which uses `TimeZone.getDefault()`.
189fn parse_timestamp_to_millis(ts: &str) -> DFResult<i64> {
190    use chrono::{Local, NaiveDateTime, TimeZone};
191
192    let naive = NaiveDateTime::parse_from_str(ts, "%Y-%m-%d %H:%M:%S").map_err(|e| {
193        datafusion::error::DataFusionError::Plan(format!(
194            "Cannot parse time travel timestamp '{ts}': {e}. Expected format: YYYY-MM-DD HH:MM:SS"
195        ))
196    })?;
197    let local = Local.from_local_datetime(&naive).single().ok_or_else(|| {
198        datafusion::error::DataFusionError::Plan(format!("Ambiguous or invalid local time: '{ts}'"))
199    })?;
200    Ok(local.timestamp_millis())
201}