#![cfg(all(test, feature = "persistence"))]
#![allow(deprecated)]
use crate::distance::DistanceMetric;
use crate::point::Point;
use crate::velesql::Parser;
use crate::Collection;
use std::collections::HashMap;
use std::path::PathBuf;
fn create_test_collection() -> (Collection, tempfile::TempDir) {
let temp_dir = tempfile::tempdir().unwrap();
let path = PathBuf::from(temp_dir.path());
let collection = Collection::create(path, 4, DistanceMetric::Cosine).unwrap();
(collection, temp_dir)
}
#[test]
fn test_parser_groupby_single_column() {
let query = Parser::parse("SELECT category, COUNT(*) FROM items GROUP BY category").unwrap();
assert!(query.select.group_by.is_some());
let group_by = query.select.group_by.as_ref().unwrap();
assert_eq!(group_by.columns.len(), 1);
assert_eq!(group_by.columns[0], "category");
}
#[test]
fn test_parser_groupby_multiple_columns() {
let query =
Parser::parse("SELECT category, status, COUNT(*) FROM items GROUP BY category, status")
.unwrap();
assert!(query.select.group_by.is_some());
let group_by = query.select.group_by.as_ref().unwrap();
assert_eq!(group_by.columns.len(), 2);
assert_eq!(group_by.columns[0], "category");
assert_eq!(group_by.columns[1], "status");
}
#[test]
fn test_parser_groupby_with_aggregations() {
let query = Parser::parse(
"SELECT category, COUNT(*), SUM(price), AVG(rating) FROM items GROUP BY category",
)
.unwrap();
assert!(query.select.group_by.is_some());
match &query.select.columns {
crate::velesql::SelectColumns::Mixed {
columns,
aggregations,
..
} => {
assert_eq!(columns.len(), 1); assert_eq!(aggregations.len(), 3); }
crate::velesql::SelectColumns::Aggregations(aggs) => {
assert!(aggs.len() >= 3); }
_ => panic!("Expected Mixed or Aggregations in SELECT"),
}
}
#[test]
fn test_executor_groupby_count() {
let (collection, _tmp) = create_test_collection();
let points = vec![
Point {
id: 1,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"category": "tech"})),
sparse_vectors: None,
},
Point {
id: 2,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"category": "tech"})),
sparse_vectors: None,
},
Point {
id: 3,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"category": "tech"})),
sparse_vectors: None,
},
Point {
id: 4,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"category": "science"})),
sparse_vectors: None,
},
Point {
id: 5,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"category": "science"})),
sparse_vectors: None,
},
Point {
id: 6,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"category": "history"})),
sparse_vectors: None,
},
];
collection.upsert(points).unwrap();
let query = Parser::parse("SELECT category, COUNT(*) FROM items GROUP BY category").unwrap();
let params = HashMap::new();
let result = collection.execute_aggregate(&query, ¶ms).unwrap();
let groups = result.as_array().expect("Result should be array of groups");
assert_eq!(groups.len(), 3);
let tech_group = groups
.iter()
.find(|g| g.get("category") == Some(&serde_json::json!("tech")));
assert!(tech_group.is_some());
assert_eq!(
tech_group
.unwrap()
.get("count")
.and_then(serde_json::Value::as_u64),
Some(3)
);
let science_group = groups
.iter()
.find(|g| g.get("category") == Some(&serde_json::json!("science")));
assert!(science_group.is_some());
assert_eq!(
science_group
.unwrap()
.get("count")
.and_then(serde_json::Value::as_u64),
Some(2)
);
}
#[test]
fn test_executor_groupby_multiple_aggregations() {
let (collection, _tmp) = create_test_collection();
let points = vec![
Point {
id: 1,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"category": "tech", "price": 100})),
sparse_vectors: None,
},
Point {
id: 2,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"category": "tech", "price": 200})),
sparse_vectors: None,
},
Point {
id: 3,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"category": "science", "price": 150})),
sparse_vectors: None,
},
];
collection.upsert(points).unwrap();
let query = Parser::parse("SELECT category, COUNT(*), SUM(price) FROM items GROUP BY category")
.unwrap();
let params = HashMap::new();
let result = collection.execute_aggregate(&query, ¶ms).unwrap();
let groups = result.as_array().expect("Result should be array");
let tech_group = groups
.iter()
.find(|g| g.get("category") == Some(&serde_json::json!("tech")))
.unwrap();
assert_eq!(
tech_group.get("count").and_then(serde_json::Value::as_u64),
Some(2)
);
assert_eq!(
tech_group
.get("sum_price")
.and_then(serde_json::Value::as_f64),
Some(300.0)
);
let science_group = groups
.iter()
.find(|g| g.get("category") == Some(&serde_json::json!("science")))
.unwrap();
assert_eq!(
science_group
.get("count")
.and_then(serde_json::Value::as_u64),
Some(1)
);
assert_eq!(
science_group
.get("sum_price")
.and_then(serde_json::Value::as_f64),
Some(150.0)
);
}
#[test]
fn test_executor_groupby_with_avg() {
let (collection, _tmp) = create_test_collection();
let points = vec![
Point {
id: 1,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"category": "A", "rating": 4})),
sparse_vectors: None,
},
Point {
id: 2,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"category": "A", "rating": 6})),
sparse_vectors: None,
},
Point {
id: 3,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"category": "B", "rating": 3})),
sparse_vectors: None,
},
];
collection.upsert(points).unwrap();
let query = Parser::parse("SELECT category, AVG(rating) FROM items GROUP BY category").unwrap();
let params = HashMap::new();
let result = collection.execute_aggregate(&query, ¶ms).unwrap();
let groups = result.as_array().expect("Result should be array");
let a_group = groups
.iter()
.find(|g| g.get("category") == Some(&serde_json::json!("A")))
.unwrap();
assert_eq!(
a_group
.get("avg_rating")
.and_then(serde_json::Value::as_f64),
Some(5.0)
); }
#[test]
fn test_executor_groupby_multiple_columns() {
let (collection, _tmp) = create_test_collection();
let points = vec![
Point {
id: 1,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"category": "tech", "status": "active"})),
sparse_vectors: None,
},
Point {
id: 2,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"category": "tech", "status": "active"})),
sparse_vectors: None,
},
Point {
id: 3,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"category": "tech", "status": "inactive"})),
sparse_vectors: None,
},
Point {
id: 4,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"category": "science", "status": "active"})),
sparse_vectors: None,
},
];
collection.upsert(points).unwrap();
let query =
Parser::parse("SELECT category, status, COUNT(*) FROM items GROUP BY category, status")
.unwrap();
let params = HashMap::new();
let result = collection.execute_aggregate(&query, ¶ms).unwrap();
let groups = result.as_array().expect("Result should be array");
assert_eq!(groups.len(), 3); }
#[test]
fn test_groupby_limit_protection() {
let (collection, _tmp) = create_test_collection();
let points: Vec<Point> = (0..100u64)
.map(|i| Point {
id: i,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"category": format!("cat_{}", i)})),
sparse_vectors: None,
})
.collect();
collection.upsert(points).unwrap();
let query = Parser::parse("SELECT category, COUNT(*) FROM items GROUP BY category").unwrap();
let params = HashMap::new();
let result = collection.execute_aggregate(&query, ¶ms);
assert!(result.is_ok());
}
#[test]
fn test_parser_groupby_nested_field() {
let query =
Parser::parse("SELECT metadata.source, COUNT(*) FROM events GROUP BY metadata.source")
.unwrap();
let group_by = query
.select
.group_by
.as_ref()
.expect("GROUP BY should exist");
assert_eq!(group_by.columns.len(), 1);
assert_eq!(group_by.columns[0], "metadata.source");
}
#[test]
fn test_parser_groupby_multiple_nested_fields() {
let query = Parser::parse(
"SELECT metadata.source, metadata.campaign, COUNT(*) FROM events GROUP BY metadata.source, metadata.campaign"
).unwrap();
let group_by = query
.select
.group_by
.as_ref()
.expect("GROUP BY should exist");
assert_eq!(group_by.columns.len(), 2);
assert_eq!(group_by.columns[0], "metadata.source");
assert_eq!(group_by.columns[1], "metadata.campaign");
}
#[test]
fn test_parser_groupby_deep_nested_field() {
let query = Parser::parse(
"SELECT profile.address.city, COUNT(*) FROM users GROUP BY profile.address.city",
)
.unwrap();
let group_by = query
.select
.group_by
.as_ref()
.expect("GROUP BY should exist");
assert_eq!(group_by.columns.len(), 1);
assert_eq!(group_by.columns[0], "profile.address.city");
}
#[test]
fn test_executor_groupby_nested_field() {
let (collection, _tmp) = create_test_collection();
let points = vec![
Point {
id: 1,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({
"type": "click",
"metadata": {"source": "web", "campaign": "summer"}
})),
sparse_vectors: None,
},
Point {
id: 2,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({
"type": "view",
"metadata": {"source": "mobile", "campaign": "summer"}
})),
sparse_vectors: None,
},
Point {
id: 3,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({
"type": "click",
"metadata": {"source": "web", "campaign": "winter"}
})),
sparse_vectors: None,
},
Point {
id: 4,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({
"type": "view",
"metadata": {"source": "web", "campaign": "summer"}
})),
sparse_vectors: None,
},
];
collection.upsert(points).unwrap();
let query = Parser::parse(
"SELECT metadata.source, COUNT(*) as cnt FROM events GROUP BY metadata.source",
)
.unwrap();
let params = HashMap::new();
let result = collection.execute_aggregate(&query, ¶ms).unwrap();
let groups = result.as_array().expect("Result should be array");
assert_eq!(groups.len(), 2);
let web_group = groups
.iter()
.find(|g| g.get("metadata.source") == Some(&serde_json::json!("web")))
.expect("web group should exist");
assert_eq!(
web_group.get("cnt").and_then(serde_json::Value::as_u64),
Some(3)
);
let mobile_group = groups
.iter()
.find(|g| g.get("metadata.source") == Some(&serde_json::json!("mobile")))
.expect("mobile group should exist");
assert_eq!(
mobile_group.get("cnt").and_then(serde_json::Value::as_u64),
Some(1)
);
}
#[test]
fn test_executor_groupby_deep_nested_field() {
let (collection, _tmp) = create_test_collection();
let points = vec![
Point {
id: 1,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({
"profile": {"address": {"city": "Paris", "country": "FR"}}
})),
sparse_vectors: None,
},
Point {
id: 2,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({
"profile": {"address": {"city": "Lyon", "country": "FR"}}
})),
sparse_vectors: None,
},
Point {
id: 3,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({
"profile": {"address": {"city": "Paris", "country": "FR"}}
})),
sparse_vectors: None,
},
];
collection.upsert(points).unwrap();
let query = Parser::parse(
"SELECT profile.address.city, COUNT(*) FROM users GROUP BY profile.address.city",
)
.unwrap();
let params = HashMap::new();
let result = collection.execute_aggregate(&query, ¶ms).unwrap();
let groups = result.as_array().expect("Result should be array");
assert_eq!(groups.len(), 2);
let paris_group = groups
.iter()
.find(|g| g.get("profile.address.city") == Some(&serde_json::json!("Paris")))
.expect("Paris group should exist");
assert_eq!(
paris_group.get("count").and_then(serde_json::Value::as_u64),
Some(2)
);
}
#[test]
fn test_executor_groupby_nested_null_handling() {
let (collection, _tmp) = create_test_collection();
let points = vec![
Point {
id: 1,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"metadata": {"source": "web"}})),
sparse_vectors: None,
},
Point {
id: 2,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({"metadata": {}})), sparse_vectors: None,
},
Point {
id: 3,
vector: vec![0.1; 4],
payload: Some(serde_json::json!({})), sparse_vectors: None,
},
];
collection.upsert(points).unwrap();
let query =
Parser::parse("SELECT metadata.source, COUNT(*) FROM events GROUP BY metadata.source")
.unwrap();
let params = HashMap::new();
let result = collection.execute_aggregate(&query, ¶ms).unwrap();
let groups = result.as_array().expect("Result should be array");
assert_eq!(groups.len(), 2);
let null_group = groups
.iter()
.find(|g| g.get("metadata.source") == Some(&serde_json::Value::Null))
.expect("null group should exist");
assert_eq!(
null_group.get("count").and_then(serde_json::Value::as_u64),
Some(2)
);
}