use std::collections::{HashMap, HashSet};
use uni_cypher::ast::{BinaryOp, CypherLiteral, Expr, UnaryOp};
use uni_common::Value;
use uni_common::core::id::UniId;
use uni_common::core::schema::{
IndexDefinition, IndexStatus, PropertyMeta, ScalarIndexType, Schema,
};
#[derive(Debug, Clone, Default)]
pub struct PushdownStrategy {
pub uid_lookup: Option<UniId>,
pub btree_prefix_scans: Vec<(String, String, String)>,
pub json_fts_predicates: Vec<(String, String, Option<String>)>,
pub lance_predicates: Vec<Expr>,
pub hash_index_columns: Vec<String>,
pub residual: Vec<Expr>,
}
#[derive(Debug)]
pub struct IndexAwareAnalyzer<'a> {
schema: &'a Schema,
}
impl<'a> IndexAwareAnalyzer<'a> {
pub fn new(schema: &'a Schema) -> Self {
Self { schema }
}
pub fn analyze(&self, predicate: &Expr, variable: &str, label_id: u16) -> PushdownStrategy {
let mut strategy = PushdownStrategy::default();
let conjuncts = Self::split_conjuncts(predicate);
let lance_analyzer = PredicateAnalyzer::new();
for conj in conjuncts {
if let Some(uid) = self.extract_uid_predicate(&conj, variable) {
strategy.uid_lookup = Some(uid);
continue;
}
if let Some((column, lower, upper)) =
self.extract_btree_prefix_scan(&conj, variable, label_id)
{
strategy.btree_prefix_scans.push((column, lower, upper));
continue;
}
if let Some((column, term, path)) =
self.extract_json_fts_predicate(&conj, variable, label_id)
{
strategy.json_fts_predicates.push((column, term, path));
continue;
}
if lance_analyzer.is_pushable(&conj, variable) {
if let Some(col) = self.hash_index_column(&conj, variable, label_id)
&& !strategy.hash_index_columns.contains(&col)
{
strategy.hash_index_columns.push(col);
}
strategy.lance_predicates.push(conj);
} else {
strategy.residual.push(conj);
}
}
strategy
}
fn hash_index_column(&self, expr: &Expr, variable: &str, label_id: u16) -> Option<String> {
let prop = match expr {
Expr::BinaryOp {
left,
op: BinaryOp::Eq,
..
} => match left.as_ref() {
Expr::Property(var_expr, prop) => match var_expr.as_ref() {
Expr::Variable(v) if v == variable => prop.clone(),
_ => return None,
},
_ => return None,
},
Expr::In { expr: left, .. } => match left.as_ref() {
Expr::Property(var_expr, prop) => match var_expr.as_ref() {
Expr::Variable(v) if v == variable => prop.clone(),
_ => return None,
},
_ => return None,
},
_ => return None,
};
let label_name = self.schema.label_name_by_id(label_id)?;
for idx in &self.schema.indexes {
if let IndexDefinition::Scalar(cfg) = idx
&& cfg.label == *label_name
&& cfg.properties.contains(&prop)
&& cfg.index_type == ScalarIndexType::Hash
&& cfg.metadata.status == IndexStatus::Online
{
return Some(prop);
}
}
None
}
fn extract_uid_predicate(&self, expr: &Expr, variable: &str) -> Option<UniId> {
if let Expr::BinaryOp {
left,
op: BinaryOp::Eq,
right,
} = expr
&& let Expr::Property(var_expr, prop) = left.as_ref()
&& let Expr::Variable(v) = var_expr.as_ref()
&& v == variable
&& prop == "_uid"
&& let Expr::Literal(CypherLiteral::String(s)) = right.as_ref()
{
return UniId::from_multibase(s).ok();
}
None
}
fn extract_btree_prefix_scan(
&self,
expr: &Expr,
variable: &str,
label_id: u16,
) -> Option<(String, String, String)> {
if let Expr::BinaryOp {
left,
op: BinaryOp::StartsWith,
right,
} = expr
&& let Expr::Property(var_expr, prop) = left.as_ref()
&& let Expr::Variable(v) = var_expr.as_ref()
&& v == variable
&& let Expr::Literal(CypherLiteral::String(prefix)) = right.as_ref()
{
if prefix.is_empty() {
return None;
}
let label_name = self.schema.label_name_by_id(label_id)?;
for idx in &self.schema.indexes {
if let IndexDefinition::Scalar(cfg) = idx
&& cfg.label == *label_name
&& cfg.properties.contains(prop)
&& cfg.index_type == ScalarIndexType::BTree
&& cfg.metadata.status == IndexStatus::Online
{
if let Some(upper) = increment_last_char(prefix) {
return Some((prop.clone(), prefix.clone(), upper));
}
}
}
}
None
}
fn extract_json_fts_predicate(
&self,
expr: &Expr,
variable: &str,
label_id: u16,
) -> Option<(String, String, Option<String>)> {
if let Expr::BinaryOp {
left,
op: BinaryOp::Contains,
right,
} = expr
&& let Expr::Property(var_expr, prop) = left.as_ref()
&& let Expr::Variable(v) = var_expr.as_ref()
&& v == variable
&& let Expr::Literal(CypherLiteral::String(term)) = right.as_ref()
{
let label_name = self.schema.label_name_by_id(label_id)?;
for idx in &self.schema.indexes {
if let IndexDefinition::JsonFullText(cfg) = idx
&& cfg.label == *label_name
&& cfg.column == *prop
&& cfg.metadata.status == IndexStatus::Online
{
return Some((prop.clone(), term.clone(), None));
}
}
}
None
}
fn split_conjuncts(expr: &Expr) -> Vec<Expr> {
match expr {
Expr::BinaryOp {
left,
op: BinaryOp::And,
right,
} => {
let mut result = Self::split_conjuncts(left);
result.extend(Self::split_conjuncts(right));
result
}
_ => vec![expr.clone()],
}
}
}
#[derive(Debug)]
pub struct PredicateAnalysis {
pub pushable: Vec<Expr>,
pub residual: Vec<Expr>,
pub required_properties: Vec<String>,
}
#[derive(Debug, Default)]
pub struct PredicateAnalyzer;
impl PredicateAnalyzer {
pub fn new() -> Self {
Self
}
pub fn analyze(&self, predicate: &Expr, scan_variable: &str) -> PredicateAnalysis {
let mut pushable = Vec::new();
let mut residual = Vec::new();
self.split_conjuncts(predicate, scan_variable, &mut pushable, &mut residual);
let required_properties = self.extract_properties(&residual, scan_variable);
PredicateAnalysis {
pushable,
residual,
required_properties,
}
}
fn split_conjuncts(
&self,
expr: &Expr,
variable: &str,
pushable: &mut Vec<Expr>,
residual: &mut Vec<Expr>,
) {
if let Some(in_expr) = try_or_to_in(expr, variable)
&& self.is_pushable(&in_expr, variable)
{
pushable.push(in_expr);
return;
}
match expr {
Expr::BinaryOp {
left,
op: BinaryOp::And,
right,
} => {
self.split_conjuncts(left, variable, pushable, residual);
self.split_conjuncts(right, variable, pushable, residual);
}
_ => {
if self.is_pushable(expr, variable) {
pushable.push(expr.clone());
} else {
residual.push(expr.clone());
}
}
}
}
pub fn is_pushable(&self, expr: &Expr, variable: &str) -> bool {
match expr {
Expr::In {
expr: left,
list: right,
} => {
let left_is_property = matches!(
left.as_ref(),
Expr::Property(box_expr, _) if matches!(box_expr.as_ref(), Expr::Variable(v) if v == variable)
);
let right_valid = matches!(right.as_ref(), Expr::List(_) | Expr::Parameter(_));
left_is_property && right_valid
}
Expr::BinaryOp { left, op, right } => {
let op_supported = matches!(
op,
BinaryOp::Eq
| BinaryOp::NotEq
| BinaryOp::Lt
| BinaryOp::LtEq
| BinaryOp::Gt
| BinaryOp::GtEq
| BinaryOp::Contains
| BinaryOp::StartsWith
| BinaryOp::EndsWith
);
if !op_supported {
return false;
}
let left_is_property = matches!(
left.as_ref(),
Expr::Property(box_expr, _) if matches!(box_expr.as_ref(), Expr::Variable(v) if v == variable)
);
let right_valid = if matches!(
op,
BinaryOp::Contains | BinaryOp::StartsWith | BinaryOp::EndsWith
) {
matches!(right.as_ref(), Expr::Literal(CypherLiteral::String(_)))
} else {
matches!(
right.as_ref(),
Expr::Literal(_) | Expr::Parameter(_) | Expr::List(_)
)
};
left_is_property && right_valid
}
Expr::UnaryOp {
op: UnaryOp::Not,
expr,
} => self.is_pushable(expr, variable),
Expr::IsNull(inner) | Expr::IsNotNull(inner) => {
matches!(
inner.as_ref(),
Expr::Property(var_expr, _)
if matches!(var_expr.as_ref(), Expr::Variable(v) if v == variable)
)
}
_ => false,
}
}
fn extract_properties(&self, exprs: &[Expr], variable: &str) -> Vec<String> {
let mut props = HashSet::new();
for expr in exprs {
collect_properties(expr, variable, &mut props);
}
props.into_iter().collect()
}
}
pub fn try_label_or_to_union(expr: &Expr, variable: &str) -> Option<Vec<String>> {
let mut labels: Vec<String> = Vec::new();
if collect_or_branches(expr, variable, &mut labels, &label_leaf) && labels.len() >= 2 {
Some(labels)
} else {
None
}
}
fn collect_or_branches<F>(expr: &Expr, variable: &str, out: &mut Vec<String>, leaf: &F) -> bool
where
F: Fn(&Expr, &str, &mut Vec<String>) -> bool,
{
match expr {
Expr::BinaryOp {
left,
op: BinaryOp::Or,
right,
} => {
collect_or_branches(left, variable, out, leaf)
&& collect_or_branches(right, variable, out, leaf)
}
_ => leaf(expr, variable, out),
}
}
fn label_leaf(expr: &Expr, variable: &str, out: &mut Vec<String>) -> bool {
let Expr::LabelCheck {
expr: target,
labels,
} = expr
else {
return false;
};
if labels.len() != 1 {
return false;
}
if let Expr::Variable(v) = target.as_ref()
&& v == variable
{
out.push(labels[0].clone());
return true;
}
false
}
pub fn try_type_or_to_union(expr: &Expr, variable: &str) -> Option<Vec<String>> {
let mut types: Vec<String> = Vec::new();
if collect_or_branches(expr, variable, &mut types, &type_eq_leaf) && types.len() >= 2 {
Some(types)
} else {
None
}
}
fn type_eq_leaf(expr: &Expr, variable: &str, out: &mut Vec<String>) -> bool {
let Expr::BinaryOp {
left,
op: BinaryOp::Eq,
right,
} = expr
else {
return false;
};
is_type_eq_string(left, right, variable, out) || is_type_eq_string(right, left, variable, out)
}
fn is_type_eq_string(
fn_side: &Expr,
str_side: &Expr,
variable: &str,
out: &mut Vec<String>,
) -> bool {
if let Expr::FunctionCall { name, args, .. } = fn_side
&& name.eq_ignore_ascii_case("type")
&& args.len() == 1
&& let Expr::Variable(v) = &args[0]
&& v == variable
&& let Expr::Literal(CypherLiteral::String(s)) = str_side
{
out.push(s.clone());
return true;
}
false
}
fn try_or_to_in(expr: &Expr, variable: &str) -> Option<Expr> {
match expr {
Expr::BinaryOp {
op: BinaryOp::Or, ..
} => {
let mut property: Option<String> = None;
let mut values: Vec<Expr> = Vec::new();
if collect_or_equals(expr, variable, &mut property, &mut values)
&& let Some(prop) = property
&& values.len() >= 2
{
return Some(Expr::In {
expr: Box::new(Expr::Property(
Box::new(Expr::Variable(variable.to_string())),
prop,
)),
list: Box::new(Expr::List(values)),
});
}
None
}
_ => None,
}
}
fn collect_or_equals(
expr: &Expr,
variable: &str,
property: &mut Option<String>,
values: &mut Vec<Expr>,
) -> bool {
match expr {
Expr::BinaryOp {
left,
op: BinaryOp::Or,
right,
} => {
collect_or_equals(left, variable, property, values)
&& collect_or_equals(right, variable, property, values)
}
Expr::BinaryOp {
left,
op: BinaryOp::Eq,
right,
} => {
if let Expr::Property(var_expr, prop) = left.as_ref()
&& let Expr::Variable(v) = var_expr.as_ref()
&& v == variable
{
match property {
None => {
*property = Some(prop.clone());
values.push(right.as_ref().clone());
return true;
}
Some(p) if p == prop => {
values.push(right.as_ref().clone());
return true;
}
_ => return false, }
}
false
}
_ => false,
}
}
fn collect_properties(expr: &Expr, variable: &str, props: &mut HashSet<String>) {
match expr {
Expr::Property(box_expr, prop) => {
if let Expr::Variable(v) = box_expr.as_ref()
&& v == variable
{
props.insert(prop.clone());
}
}
Expr::BinaryOp { left, right, .. } => {
collect_properties(left, variable, props);
collect_properties(right, variable, props);
}
Expr::UnaryOp { expr, .. } => {
collect_properties(expr, variable, props);
}
Expr::IsNull(expr) | Expr::IsNotNull(expr) => {
collect_properties(expr, variable, props);
}
Expr::List(items) => {
for item in items {
collect_properties(item, variable, props);
}
}
Expr::Map(items) => {
for (_, item) in items {
collect_properties(item, variable, props);
}
}
Expr::FunctionCall { args, .. } => {
for arg in args {
collect_properties(arg, variable, props);
}
}
Expr::ArrayIndex {
array: arr,
index: idx,
} => {
collect_properties(arr, variable, props);
collect_properties(idx, variable, props);
}
_ => {}
}
}
fn increment_last_char(s: &str) -> Option<String> {
if s.is_empty() {
return None;
}
let mut chars: Vec<char> = s.chars().collect();
let last_idx = chars.len() - 1;
let last_char = chars[last_idx];
if let Some(next_char) = char::from_u32(last_char as u32 + 1) {
chars[last_idx] = next_char;
Some(chars.into_iter().collect())
} else {
None
}
}
fn flatten_ands(expr: &Expr) -> Vec<&Expr> {
match expr {
Expr::BinaryOp {
left,
op: BinaryOp::And,
right,
} => {
let mut result = flatten_ands(left);
result.extend(flatten_ands(right));
result
}
_ => vec![expr],
}
}
#[derive(Debug)]
pub struct LanceFilterGenerator;
impl LanceFilterGenerator {
fn contains_sql_wildcards(s: &str) -> bool {
s.contains('%') || s.contains('_')
}
#[expect(
dead_code,
reason = "Reserved for future use when Lance supports ESCAPE"
)]
fn escape_like_pattern(s: &str) -> String {
s.replace('\\', "\\\\")
.replace('%', "\\%")
.replace('_', "\\_")
.replace('\'', "''")
}
pub fn generate(
predicates: &[Expr],
variable: &str,
schema_props: Option<&HashMap<String, PropertyMeta>>,
) -> Option<String> {
if predicates.is_empty() {
return None;
}
let flattened: Vec<&Expr> = predicates.iter().flat_map(|p| flatten_ands(p)).collect();
let mut by_column: HashMap<String, Vec<&Expr>> = HashMap::new();
let mut optimized_filters: Vec<String> = Vec::new();
let mut used_expressions: HashSet<*const Expr> = HashSet::new();
for expr in &flattened {
if let Some(col) = Self::extract_column_from_range(expr, variable, schema_props) {
by_column.entry(col).or_default().push(expr);
}
}
for (col, exprs) in &by_column {
if exprs.len() < 2 {
continue;
}
let mut lower: Option<(bool, &Expr, &Expr)> = None; let mut upper: Option<(bool, &Expr, &Expr)> = None;
for expr in exprs {
if let Expr::BinaryOp { op, right, .. } = expr {
match op {
BinaryOp::Gt => {
lower = Some((false, right, expr));
}
BinaryOp::GtEq => {
lower = Some((true, right, expr));
}
BinaryOp::Lt => {
upper = Some((false, right, expr));
}
BinaryOp::LtEq => {
upper = Some((true, right, expr));
}
_ => {}
}
}
}
if let (Some((true, l_val, l_expr)), Some((true, u_val, u_expr))) = (lower, upper) {
if let (Some(l_str), Some(u_str)) =
(Self::value_to_lance(l_val), Self::value_to_lance(u_val))
{
optimized_filters.push(format!(
"\"{}\" >= {} AND \"{}\" <= {}",
col, l_str, col, u_str
));
used_expressions.insert(l_expr as *const Expr);
used_expressions.insert(u_expr as *const Expr);
}
}
}
let mut filters = optimized_filters;
for expr in flattened {
if used_expressions.contains(&(expr as *const Expr)) {
continue;
}
if let Some(s) = Self::expr_to_lance(expr, variable, schema_props) {
filters.push(s);
}
}
if filters.is_empty() {
None
} else {
Some(filters.join(" AND "))
}
}
fn extract_column_from_range(
expr: &Expr,
variable: &str,
schema_props: Option<&HashMap<String, PropertyMeta>>,
) -> Option<String> {
match expr {
Expr::BinaryOp { left, op, .. } => {
if matches!(
op,
BinaryOp::Gt | BinaryOp::GtEq | BinaryOp::Lt | BinaryOp::LtEq
) {
return Self::extract_column(left, variable, schema_props);
}
None
}
_ => None,
}
}
fn expr_to_lance(
expr: &Expr,
variable: &str,
schema_props: Option<&HashMap<String, PropertyMeta>>,
) -> Option<String> {
match expr {
Expr::In {
expr: left,
list: right,
} => {
let column = Self::extract_column(left, variable, schema_props)?;
let value = Self::value_to_lance(right)?;
Some(format!("{} IN {}", column, value))
}
Expr::BinaryOp { left, op, right } => {
let column = Self::extract_column(left, variable, schema_props)?;
match op {
BinaryOp::Contains | BinaryOp::StartsWith | BinaryOp::EndsWith => {
let raw_value = Self::get_string_value(right)?;
if Self::contains_sql_wildcards(&raw_value) {
return None;
}
let escaped = raw_value.replace('\'', "''");
match op {
BinaryOp::Contains => Some(format!("{} LIKE '%{}%'", column, escaped)),
BinaryOp::StartsWith => Some(format!("{} LIKE '{}%'", column, escaped)),
BinaryOp::EndsWith => Some(format!("{} LIKE '%{}'", column, escaped)),
_ => unreachable!(),
}
}
_ => {
let op_str = Self::op_to_lance(op)?;
let value = Self::value_to_lance(right)?;
Some(format!("{} {} {}", column, op_str, value))
}
}
}
Expr::UnaryOp {
op: UnaryOp::Not,
expr,
} => {
let inner = Self::expr_to_lance(expr, variable, schema_props)?;
Some(format!("NOT ({})", inner))
}
Expr::IsNull(inner) => {
let column = Self::extract_column(inner, variable, schema_props)?;
Some(format!("{} IS NULL", column))
}
Expr::IsNotNull(inner) => {
let column = Self::extract_column(inner, variable, schema_props)?;
Some(format!("{} IS NOT NULL", column))
}
_ => None,
}
}
fn extract_column(
expr: &Expr,
variable: &str,
schema_props: Option<&HashMap<String, PropertyMeta>>,
) -> Option<String> {
match expr {
Expr::Property(box_expr, prop) => {
if let Expr::Variable(var) = box_expr.as_ref()
&& var == variable
{
if prop.starts_with('_') {
return Some(prop.clone());
}
if let Some(props) = schema_props
&& !props.contains_key(prop.as_str())
{
return None;
}
return Some(prop.clone());
}
None
}
_ => None,
}
}
fn op_to_lance(op: &BinaryOp) -> Option<&'static str> {
match op {
BinaryOp::Eq => Some("="),
BinaryOp::NotEq => Some("!="),
BinaryOp::Lt => Some("<"),
BinaryOp::LtEq => Some("<="),
BinaryOp::Gt => Some(">"),
BinaryOp::GtEq => Some(">="),
_ => None,
}
}
fn value_to_lance(expr: &Expr) -> Option<String> {
match expr {
Expr::Literal(CypherLiteral::String(s)) => {
let s = super::df_expr::normalize_datetime_str(s).unwrap_or_else(|| s.clone());
Some(format!("'{}'", s.replace("'", "''")))
}
Expr::Literal(CypherLiteral::Integer(i)) => Some(i.to_string()),
Expr::Literal(CypherLiteral::Float(f)) => Some(f.to_string()),
Expr::Literal(CypherLiteral::Bool(b)) => Some(b.to_string()),
Expr::Literal(CypherLiteral::Null) => Some("NULL".to_string()),
Expr::List(items) => {
let values: Option<Vec<String>> = items.iter().map(Self::value_to_lance).collect();
values.map(|v| format!("({})", v.join(", ")))
}
Expr::Parameter(_) => None,
_ => None,
}
}
fn get_string_value(expr: &Expr) -> Option<String> {
match expr {
Expr::Literal(CypherLiteral::String(s)) => Some(s.clone()),
_ => None,
}
}
}
pub fn predicate_target_column(expr: &Expr, variable: &str) -> Option<String> {
let prop_side = match expr {
Expr::BinaryOp { left, .. } => left.as_ref(),
Expr::In { expr: left, .. } => left.as_ref(),
Expr::IsNull(inner) | Expr::IsNotNull(inner) => inner.as_ref(),
_ => return None,
};
if let Expr::Property(var_expr, prop) = prop_side
&& let Expr::Variable(v) = var_expr.as_ref()
&& v == variable
{
return Some(prop.clone());
}
None
}
fn value_to_expr(v: &Value) -> Option<Expr> {
Some(match v {
Value::Null => Expr::Literal(CypherLiteral::Null),
Value::Bool(b) => Expr::Literal(CypherLiteral::Bool(*b)),
Value::Int(i) => Expr::Literal(CypherLiteral::Integer(*i)),
Value::Float(f) => Expr::Literal(CypherLiteral::Float(*f)),
Value::String(s) => Expr::Literal(CypherLiteral::String(s.clone())),
Value::List(items) => {
let items: Option<Vec<Expr>> = items.iter().map(value_to_expr).collect();
Expr::List(items?)
}
_ => return None,
})
}
pub fn substitute_params(expr: &Expr, params: &HashMap<String, Value>) -> Option<Expr> {
Some(match expr {
Expr::Parameter(name) => value_to_expr(params.get(name)?)?,
Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
left: Box::new(substitute_params(left, params)?),
op: *op,
right: Box::new(substitute_params(right, params)?),
},
Expr::UnaryOp { op, expr: inner } => Expr::UnaryOp {
op: *op,
expr: Box::new(substitute_params(inner, params)?),
},
Expr::In { expr: left, list } => Expr::In {
expr: Box::new(substitute_params(left, params)?),
list: Box::new(substitute_params(list, params)?),
},
Expr::IsNull(inner) => Expr::IsNull(Box::new(substitute_params(inner, params)?)),
Expr::IsNotNull(inner) => Expr::IsNotNull(Box::new(substitute_params(inner, params)?)),
Expr::List(items) => {
let items: Option<Vec<Expr>> =
items.iter().map(|i| substitute_params(i, params)).collect();
Expr::List(items?)
}
_ => expr.clone(),
})
}
#[cfg(test)]
mod security_tests {
use super::*;
mod wildcard_protection {
use super::*;
#[test]
fn test_contains_sql_wildcards_detects_percent() {
assert!(LanceFilterGenerator::contains_sql_wildcards("admin%"));
assert!(LanceFilterGenerator::contains_sql_wildcards("%admin"));
assert!(LanceFilterGenerator::contains_sql_wildcards("ad%min"));
}
#[test]
fn test_contains_sql_wildcards_detects_underscore() {
assert!(LanceFilterGenerator::contains_sql_wildcards("a_min"));
assert!(LanceFilterGenerator::contains_sql_wildcards("_admin"));
assert!(LanceFilterGenerator::contains_sql_wildcards("admin_"));
}
#[test]
fn test_contains_sql_wildcards_safe_strings() {
assert!(!LanceFilterGenerator::contains_sql_wildcards("admin"));
assert!(!LanceFilterGenerator::contains_sql_wildcards("John Smith"));
assert!(!LanceFilterGenerator::contains_sql_wildcards(
"test@example.com"
));
}
#[test]
fn test_wildcard_in_contains_not_pushed_down() {
let expr = Expr::BinaryOp {
left: Box::new(Expr::Property(
Box::new(Expr::Variable("n".to_string())),
"name".to_string(),
)),
op: BinaryOp::Contains,
right: Box::new(Expr::Literal(CypherLiteral::String("admin%".to_string()))),
};
let filter = LanceFilterGenerator::generate(&[expr], "n", None);
assert!(
filter.is_none(),
"CONTAINS with wildcard should not be pushed to storage"
);
}
#[test]
fn test_underscore_in_startswith_not_pushed_down() {
let expr = Expr::BinaryOp {
left: Box::new(Expr::Property(
Box::new(Expr::Variable("n".to_string())),
"name".to_string(),
)),
op: BinaryOp::StartsWith,
right: Box::new(Expr::Literal(CypherLiteral::String("user_".to_string()))),
};
let filter = LanceFilterGenerator::generate(&[expr], "n", None);
assert!(
filter.is_none(),
"STARTSWITH with underscore should not be pushed to storage"
);
}
#[test]
fn test_safe_contains_is_pushed_down() {
let expr = Expr::BinaryOp {
left: Box::new(Expr::Property(
Box::new(Expr::Variable("n".to_string())),
"name".to_string(),
)),
op: BinaryOp::Contains,
right: Box::new(Expr::Literal(CypherLiteral::String("admin".to_string()))),
};
let filter = LanceFilterGenerator::generate(&[expr], "n", None);
assert!(filter.is_some(), "Safe CONTAINS should be pushed down");
assert!(
filter.as_ref().unwrap().contains("LIKE '%admin%'"),
"Generated filter: {:?}",
filter
);
}
#[test]
fn test_single_quotes_escaped_in_safe_string() {
let expr = Expr::BinaryOp {
left: Box::new(Expr::Property(
Box::new(Expr::Variable("n".to_string())),
"name".to_string(),
)),
op: BinaryOp::Contains,
right: Box::new(Expr::Literal(CypherLiteral::String("O'Brien".to_string()))),
};
let filter = LanceFilterGenerator::generate(&[expr], "n", None).unwrap();
assert!(
filter.contains("O''Brien"),
"Single quotes should be doubled: {}",
filter
);
}
}
mod parameter_safety {
use super::*;
#[test]
fn test_parameters_not_pushed_down() {
let expr = Expr::BinaryOp {
left: Box::new(Expr::Property(
Box::new(Expr::Variable("n".to_string())),
"name".to_string(),
)),
op: BinaryOp::Eq,
right: Box::new(Expr::Parameter("userInput".to_string())),
};
let filter = LanceFilterGenerator::generate(&[expr], "n", None);
assert!(
filter.is_none(),
"Parameterized predicates should not be pushed to storage"
);
}
}
}