use log::{debug, info};
use serde_json::Value;
/// Possible errors while filtering may be due to
///
/// parsing, invalid operation value etc.
#[derive(Debug)]
pub enum Md2fsError {
SerdeJsonError,
ParseError,
}
/// Where clause keys
#[derive(Debug, PartialEq, Eq)]
enum FilterOperations {
EqualTo,
GreaterThanEqualTo,
GreaterThan,
LessThan,
LessThanEqualTo,
In,
Noop,
}
impl FilterOperations {
/// Seek and return enum for pattern matching
fn get_enum(s: &str) -> FilterOperations {
let op = s.strip_prefix('$').unwrap_or(s);
match op {
"eq" => FilterOperations::EqualTo,
"gt" => FilterOperations::GreaterThan,
"gte" => FilterOperations::GreaterThanEqualTo,
"lt" => FilterOperations::LessThan,
"lte" => FilterOperations::LessThanEqualTo,
"in" => FilterOperations::In,
_ => FilterOperations::EqualTo,
}
}
}
#[derive(Debug)]
enum MetadataFilterResult {
U64Filter(MetadataFilter<u64>),
StringFilter(MetadataFilter<String>),
StringVecFilter(MetadataFilter<Vec<String>>),
}
/// Metadata filter
#[derive(Debug)]
pub struct MetadataFilter<T> {
/// Key to filter on
key: String,
/// Valid json type to filter on
value: T,
/// Filter operations eq, gt, gte, in, lt, lte
filter: FilterOperations,
}
impl<T: Default> Default for MetadataFilter<T> {
fn default() -> Self {
MetadataFilter {
key: Default::default(),
value: Default::default(),
filter: FilterOperations::Noop,
}
}
}
pub trait Filter<T> {
fn create_filter(raw: &str) -> Result<MetadataFilterResult, Md2fsError>;
fn eq(self, m: MetadataFilter<T>) -> bool;
fn gt(self, m: MetadataFilter<T>) -> bool;
fn gte(self, m: MetadataFilter<T>) -> bool;
fn lt(self, m: MetadataFilter<T>) -> bool;
fn lte(self, m: MetadataFilter<T>) -> bool;
}
impl<T> Filter<T> for MetadataFilter<T>
where
T: PartialEq + PartialOrd + Default,
{
/// Create a filter on a valid string value
fn create_filter(raw: &str) -> Result<MetadataFilterResult, Md2fsError> {
let v: Result<Value, serde_json::Error> = serde_json::from_str(raw);
if v.is_err() {
debug!("invalid json string");
return Err(Md2fsError::SerdeJsonError);
}
let u_v: Value = v.map_err(|_| Md2fsError::ParseError)?;
let vo = u_v.as_object();
if vo.is_none() {
debug!("could not parse string");
return Err(Md2fsError::ParseError);
}
let key = match vo {
Some(v) => v.keys().next().unwrap_or(&String::new()).to_string(),
_ => String::new(),
};
let vo2 = match vo {
Some(v) => v[&key].as_object(),
_ => None,
};
if vo2.is_none() {
info!("no op key found, processing as metadata");
let p_value = &u_v[&key];
if p_value.is_string() {
let value: String = p_value.as_str().unwrap_or_default().to_string();
return Ok(MetadataFilterResult::StringFilter(MetadataFilter {
key,
filter: FilterOperations::Noop,
value,
}));
} else {
let value: u64 = p_value.as_u64().unwrap_or_default();
return Ok(MetadataFilterResult::U64Filter(MetadataFilter {
key,
filter: FilterOperations::Noop,
value,
}));
}
}
let op = match vo2 {
Some(v) => v.keys().next().unwrap_or(&String::new()).to_string(),
_ => String::new(),
};
let value = match vo2 {
Some(v) => &v[&op],
_ => &Value::Null,
};
let filter: FilterOperations = FilterOperations::get_enum(&op);
if filter == FilterOperations::In {
if let Some(arr) = value.as_array() {
let str_vec: Vec<String> = arr
.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect();
return Ok(MetadataFilterResult::StringVecFilter(MetadataFilter {
key,
filter,
value: str_vec,
}));
}
}
if value.is_string() {
let value = value.as_str().unwrap_or_default().to_string();
return Ok(MetadataFilterResult::StringFilter(MetadataFilter {
key,
filter,
value,
}));
}
if value.is_number() {
let value = value.as_u64().unwrap_or_default();
return Ok(MetadataFilterResult::U64Filter(MetadataFilter {
key,
filter,
value,
}));
}
Err(Md2fsError::ParseError)
}
fn eq(self, m: MetadataFilter<T>) -> bool {
self.key == m.key && self.value == m.value
}
fn gt(self, m: MetadataFilter<T>) -> bool {
self.key == m.key && m.value > self.value
}
fn gte(self, m: MetadataFilter<T>) -> bool {
self.key == m.key && m.value >= self.value
}
fn lt(self, m: MetadataFilter<T>) -> bool {
self.key == m.key && m.value < self.value
}
fn lte(self, m: MetadataFilter<T>) -> bool {
self.key == m.key && m.value <= self.value
}
}
fn process_filter(raw_f: &str, raw_m: &str) -> Result<bool, Md2fsError> {
// 1. Parse the filter JSON to get key, op, and filter value.
let filter_result = MetadataFilter::<String>::create_filter(raw_f)?;
// 2. Parse the metadata JSON into a generic Value object.
let meta_json: Value = serde_json::from_str(raw_m).map_err(|_| Md2fsError::SerdeJsonError)?;
let meta_obj = match meta_json.as_object() {
Some(obj) => obj,
None => return Ok(false), // Metadata is not a valid JSON object.
};
// 3. Match on the filter type and perform the check.
match filter_result {
MetadataFilterResult::StringVecFilter(f_vec) => {
if let Some(meta_val) = meta_obj.get(&f_vec.key) {
if let Some(m_str) = meta_val.as_str() {
if f_vec.filter == FilterOperations::In {
return Ok(f_vec.value.contains(&m_str.to_string()));
}
}
}
Ok(false)
}
MetadataFilterResult::StringFilter(f_str) => {
if let Some(meta_val) = meta_obj.get(&f_str.key) {
if let Some(m_str) = meta_val.as_str() {
if f_str.filter == FilterOperations::EqualTo || f_str.filter == FilterOperations::Noop {
return Ok(f_str.value == m_str);
}
}
}
Ok(false)
}
MetadataFilterResult::U64Filter(f_u64) => {
if let Some(meta_val) = meta_obj.get(&f_u64.key) {
if let Some(m_u64) = meta_val.as_u64() {
return Ok(match f_u64.filter {
FilterOperations::EqualTo | FilterOperations::Noop => m_u64 == f_u64.value,
FilterOperations::GreaterThan => m_u64 > f_u64.value,
FilterOperations::GreaterThanEqualTo => m_u64 >= f_u64.value,
FilterOperations::LessThan => m_u64 < f_u64.value,
FilterOperations::LessThanEqualTo => m_u64 <= f_u64.value,
_ => false,
});
}
}
Ok(false)
}
}
}
/// Proces two raw json strings. Let `raw_f` be a valid metadata filter
///
/// and `raw_m` be valid metadata that is not a nested object. Returns true
///
/// on a valid match. The equivalent of an SQL `where` clause.
/// Proces two raw json strings. Let `raw_f` be a valid metadata filter
///
/// and `raw_m` be valid metadata that is not a nested object. Returns true
///
/// on a valid match. The equivalent of an SQL `where` clause.
pub fn filter_where(raw_f: &[String], raw_m: &[String]) -> Result<bool, Md2fsError> {
if raw_f.is_empty() {
return Ok(true);
}
// For each filter, check if it matches at least one of the metadata parts.
for filter in raw_f {
let mut filter_matched = false;
for meta_part in raw_m {
if process_filter(filter, meta_part)? {
filter_matched = true;
break; // This filter is satisfied, move to the next one.
}
}
// If any filter doesn't find a match in the metadata, the whole document fails.
if !filter_matched {
return Ok(false);
}
}
// If all filters found a match, the document passes.
Ok(true)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_gte_pass() {
let filter = r#"{"Rating": {"$gte": 4}}"#.to_string();
let meta = r#"{"Rating": 5}"#.to_string();
let result = process_filter(&filter, &meta).unwrap();
assert!(result);
}
#[test]
fn test_gte_fail() {
let filter = r#"{"Rating": {"$gte": 4}}"#.to_string();
let meta = r#"{"Rating": 3}"#.to_string();
let result = process_filter(&filter, &meta).unwrap();
assert!(!result);
}
#[test]
fn test_gte_equal_pass() {
let filter = r#"{"Rating": {"$gte": 4}}"#.to_string();
let meta = r#"{"Rating": 4}"#.to_string();
let result = process_filter(&filter, &meta).unwrap();
assert!(result);
}
#[test]
fn test_lte_pass() {
let filter = r#"{"Rating": {"$lte": 4}}"#.to_string();
let meta = r#"{"Rating": 3}"#.to_string();
let result = process_filter(&filter, &meta).unwrap();
assert!(result);
}
#[test]
fn test_in_pass() {
let filter = r#"{"genre": {"$in": ["music", "history"]}}"#.to_string();
let meta = r#"{"genre": "history"}"#.to_string();
let result = process_filter(&filter, &meta).unwrap();
assert!(result);
}
#[test]
fn test_in_fail() {
let filter = r#"{"genre": {"$in": ["music", "history"]}}"#.to_string();
let meta = r#"{"genre": "sci-fi"}"#.to_string();
let result = process_filter(&filter, &meta).unwrap();
assert!(!result);
}
#[test]
fn test_key_mismatch() {
let filter = r#"{"Rating": {"$gte": 4}}"#.to_string();
let meta = r#"{"Score": 5}"#.to_string();
let result = process_filter(&filter, &meta).unwrap();
assert!(!result);
}
#[test]
fn test_type_mismatch() {
let filter = r#"{"Rating": {"$gte": 4}}"#.to_string();
let meta = r#"{"Rating": "good"}"#.to_string();
let result = process_filter(&filter, &meta).unwrap();
assert!(!result);
}
#[test]
fn test_filter_where_pass() {
let filters = vec![
r#"{"Rating": {"$gte": 4}}"#.to_string(),
r#"{"year": {"$eq": 2020}}"#.to_string()
];
let metadata = vec![
r#"{"Rating": 5, "year": 2020}"#.to_string()
];
let result = filter_where(&filters, &metadata).unwrap();
assert!(result);
}
#[test]
fn test_filter_where_fail() {
let filters = vec![
r#"{"Rating": {"$gte": 4}}"#.to_string(),
r#"{"year": {"$eq": 2020}}"#.to_string()
];
let metadata = vec![
r#"{"Rating": 3, "year": 2020}"#.to_string()
];
let result = filter_where(&filters, &metadata).unwrap();
assert!(!result);
}
#[test]
fn test_filter_where_no_filters() {
let filters = vec![];
let metadata = vec![
r#"{"Rating": 5}"#.to_string()
];
let result = filter_where(&filters, &metadata).unwrap();
assert!(result);
}
#[test]
fn test_filter_where_no_matching_meta() {
let filters = vec![
r#"{"genre": {"$in": ["sci-fi"]}}"#.to_string()
];
let metadata = vec![
r#"{"genre": "history"}"#.to_string(),
r#"{"genre": "music"}"#.to_string()
];
let result = filter_where(&filters, &metadata).unwrap();
assert!(!result);
}
}