mod lexer;
mod parser;
mod runtime;
use arrow::{array::ListArray, datatypes::DataType};
use vec1::Vec1;
use parser::{Expr, Segment};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Selector(Expr);
impl std::fmt::Display for Selector {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl Selector {
pub fn execute_per_row(&self, source: &ListArray) -> Result<ListArray, Error> {
runtime::execute_per_row(&self.0, source).map_err(Into::into)
}
}
impl std::str::FromStr for Selector {
type Err = Error;
fn from_str(query: &str) -> Result<Self, Self::Err> {
let lexer = lexer::Lexer::new(query);
let tokens = lexer.scan_tokens()?;
let parser = parser::Parser::new(tokens.into_iter());
let expr = parser.parse()?;
Ok(Self(expr))
}
}
impl crate::Transform for Selector {
type Source = ListArray;
type Target = ListArray;
fn transform(&self, source: &Self::Source) -> Result<Self::Target, crate::Error> {
self.execute_per_row(source).map_err(Into::into)
}
}
impl crate::Transform for &Selector {
type Source = ListArray;
type Target = ListArray;
fn transform(&self, source: &Self::Source) -> Result<Self::Target, crate::Error> {
self.execute_per_row(source).map_err(Into::into)
}
}
#[derive(Debug, thiserror::Error, Clone)]
pub enum Error {
#[error(transparent)]
Lex(#[from] lexer::Error),
#[error(transparent)]
Parse(#[from] parser::Error),
#[error(transparent)]
Runtime(#[from] crate::Error),
}
pub fn extract_nested_fields<P>(
datatype: &DataType,
predicate: P,
) -> Option<Vec1<(Selector, DataType)>>
where
P: Fn(&DataType) -> bool,
{
let DataType::Struct(fields) = datatype else {
return None;
};
let mut result = Vec::new();
let mut queue = std::collections::VecDeque::new();
queue.push_back((Vec::new(), fields));
while let Some((path, fields)) = queue.pop_front() {
for field in fields {
let mut field_path = path.clone();
field_path.push(Segment::Field(field.name().clone()));
match field.data_type() {
DataType::Struct(nested_fields) => {
queue.push_back((field_path, nested_fields));
}
DataType::List(inner) => {
field_path.push(Segment::Each);
match inner.data_type() {
DataType::Struct(nested_fields) => {
queue.push_back((field_path, nested_fields));
}
dt if predicate(dt) => {
result.push((Selector(Expr::Path(field_path)), dt.clone()));
}
_ => {}
}
}
dt if predicate(dt) => {
result.push((Selector(Expr::Path(field_path)), dt.clone()));
}
_ => {}
}
}
}
Vec1::try_from_vec(result).ok()
}