lance_encoding_datafusion/
substrait.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use std::sync::Arc;
5
6use arrow_schema::Schema as ArrowSchema;
7use bytes::Bytes;
8use datafusion_common::ScalarValue;
9use datafusion_expr::Expr;
10use futures::FutureExt;
11use lance_core::datatypes::Schema;
12use lance_core::Result;
13use lance_datafusion::substrait::encode_substrait;
14use lance_datafusion::substrait::parse_substrait;
15use lance_encoding::decoder::FilterExpression;
16
17/// Helper trait to bridge lance-encoding and substrait
18pub trait FilterExpressionExt {
19    /// Convert a lance-encoding filter expression (which we assume is
20    /// substrait encoded) into a datafusion expr
21    fn substrait_to_df(&self, schema: Arc<ArrowSchema>) -> Result<Expr>;
22    /// Convert a datafusion filter expression into a lance-encoding
23    /// filter expression (using substrait)
24    fn df_to_substrait(expr: Expr, schema: &Schema) -> Result<Self>
25    where
26        Self: Sized;
27}
28
29impl FilterExpressionExt for FilterExpression {
30    fn substrait_to_df(&self, schema: Arc<ArrowSchema>) -> Result<Expr> {
31        if self.0.is_empty() {
32            return Ok(Expr::Literal(ScalarValue::Boolean(Some(true))));
33        }
34        let expr = parse_substrait(&self.0, schema).now_or_never().unwrap()?;
35        Ok(expr)
36    }
37
38    fn df_to_substrait(expr: Expr, schema: &Schema) -> Result<Self>
39    where
40        Self: Sized,
41    {
42        let schema = Arc::new(ArrowSchema::from(schema));
43        let bytes = Bytes::from(encode_substrait(expr, schema)?);
44        Ok(Self(bytes))
45    }
46}