use crate::Result;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GinIndex {
pub name: String,
pub table_name: String,
pub column_name: String,
index: HashMap<String, HashSet<u64>>,
path_index: HashMap<String, HashSet<u64>>,
value_index: HashMap<String, HashSet<u64>>,
pub total_keys: usize,
pub total_paths: usize,
pub indexed_rows: usize,
}
impl GinIndex {
pub fn new(name: String, table_name: String, column_name: String) -> Self {
Self {
name,
table_name,
column_name,
index: HashMap::new(),
path_index: HashMap::new(),
value_index: HashMap::new(),
total_keys: 0,
total_paths: 0,
indexed_rows: 0,
}
}
pub fn insert(&mut self, row_id: u64, json_value: &serde_json::Value) -> Result<()> {
let mut keys = HashSet::new();
let mut paths = Vec::new();
let mut values = HashSet::new();
self.extract_keys_paths_values(json_value, "", &mut keys, &mut paths, &mut values);
for key in keys {
self.index.entry(key)
.or_insert_with(HashSet::new)
.insert(row_id);
}
for path in paths {
self.path_index.entry(path)
.or_insert_with(HashSet::new)
.insert(row_id);
}
for value_hash in values {
self.value_index.entry(value_hash)
.or_insert_with(HashSet::new)
.insert(row_id);
}
self.indexed_rows += 1;
self.total_keys = self.index.len();
self.total_paths = self.path_index.len();
Ok(())
}
pub fn delete(&mut self, row_id: u64, json_value: &serde_json::Value) -> Result<()> {
let mut keys = HashSet::new();
let mut paths = Vec::new();
let mut values = HashSet::new();
self.extract_keys_paths_values(json_value, "", &mut keys, &mut paths, &mut values);
for key in keys {
if let Some(row_set) = self.index.get_mut(&key) {
row_set.remove(&row_id);
if row_set.is_empty() {
self.index.remove(&key);
}
}
}
for path in paths {
if let Some(row_set) = self.path_index.get_mut(&path) {
row_set.remove(&row_id);
if row_set.is_empty() {
self.path_index.remove(&path);
}
}
}
for value_hash in values {
if let Some(row_set) = self.value_index.get_mut(&value_hash) {
row_set.remove(&row_id);
if row_set.is_empty() {
self.value_index.remove(&value_hash);
}
}
}
self.indexed_rows = self.indexed_rows.saturating_sub(1);
self.total_keys = self.index.len();
self.total_paths = self.path_index.len();
Ok(())
}
pub fn search_key(&self, key: &str) -> Option<Vec<u64>> {
self.index.get(key).map(|set| set.iter().copied().collect())
}
pub fn search_any_key(&self, keys: &[String]) -> Option<Vec<u64>> {
let mut result = HashSet::new();
for key in keys {
if let Some(row_set) = self.index.get(key) {
result.extend(row_set);
}
}
if result.is_empty() {
None
} else {
Some(result.into_iter().collect())
}
}
pub fn search_all_keys(&self, keys: &[String]) -> Option<Vec<u64>> {
if keys.is_empty() {
return None;
}
let first_key = keys.first()?;
let mut result: HashSet<u64> = self.index.get(first_key)?.iter().copied().collect();
for key in keys.get(1..).unwrap_or_default() {
if let Some(row_set) = self.index.get(key) {
result.retain(|row_id| row_set.contains(row_id));
} else {
return None;
}
}
if result.is_empty() {
None
} else {
Some(result.into_iter().collect())
}
}
pub fn search_path(&self, path: &str) -> Option<Vec<u64>> {
self.path_index.get(path).map(|set| set.iter().copied().collect())
}
pub fn search_value(&self, value: &serde_json::Value) -> Option<Vec<u64>> {
let value_hash = self.hash_value(value);
self.value_index.get(&value_hash).map(|set| set.iter().copied().collect())
}
fn extract_keys_paths_values(
&self,
value: &serde_json::Value,
current_path: &str,
keys: &mut HashSet<String>,
paths: &mut Vec<String>,
values: &mut HashSet<String>,
) {
match value {
serde_json::Value::Object(obj) => {
for (key, val) in obj {
keys.insert(key.clone());
let path = if current_path.is_empty() {
key.clone()
} else {
format!("{}.{}", current_path, key)
};
paths.push(path.clone());
self.extract_keys_paths_values(val, &path, keys, paths, values);
}
}
serde_json::Value::Array(arr) => {
for (idx, val) in arr.iter().enumerate() {
let path = if current_path.is_empty() {
format!("[{}]", idx)
} else {
format!("{}[{}]", current_path, idx)
};
paths.push(path.clone());
self.extract_keys_paths_values(val, &path, keys, paths, values);
}
}
_ => {
values.insert(self.hash_value(value));
}
}
}
fn hash_value(&self, value: &serde_json::Value) -> String {
value.to_string()
}
pub fn statistics(&self) -> GinIndexStats {
GinIndexStats {
name: self.name.clone(),
table_name: self.table_name.clone(),
column_name: self.column_name.clone(),
total_keys: self.total_keys,
total_paths: self.total_paths,
total_values: self.value_index.len(),
indexed_rows: self.indexed_rows,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GinIndexStats {
pub name: String,
pub table_name: String,
pub column_name: String,
pub total_keys: usize,
pub total_paths: usize,
pub total_values: usize,
pub indexed_rows: usize,
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_gin_index_creation() {
let index = GinIndex::new(
"test_idx".to_string(),
"test_table".to_string(),
"data".to_string(),
);
assert_eq!(index.name, "test_idx");
assert_eq!(index.total_keys, 0);
assert_eq!(index.indexed_rows, 0);
}
#[test]
fn test_gin_index_insert() {
let mut index = GinIndex::new(
"test_idx".to_string(),
"test_table".to_string(),
"data".to_string(),
);
let json = json!({
"name": "Alice",
"age": 30,
"tags": ["rust", "database"]
});
index.insert(1, &json).expect("Failed to insert");
assert_eq!(index.indexed_rows, 1);
assert!(index.total_keys >= 3); }
#[test]
fn test_gin_index_key_search() {
let mut index = GinIndex::new(
"test_idx".to_string(),
"test_table".to_string(),
"data".to_string(),
);
let json1 = json!({"name": "Alice", "city": "NYC"});
let json2 = json!({"name": "Bob", "country": "USA"});
index.insert(1, &json1).unwrap();
index.insert(2, &json2).unwrap();
let results = index.search_key("name").unwrap();
assert_eq!(results.len(), 2);
let results = index.search_key("city").unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0], 1);
}
#[test]
fn test_gin_index_any_key_search() {
let mut index = GinIndex::new(
"test_idx".to_string(),
"test_table".to_string(),
"data".to_string(),
);
let json1 = json!({"name": "Alice", "city": "NYC"});
let json2 = json!({"name": "Bob", "country": "USA"});
index.insert(1, &json1).unwrap();
index.insert(2, &json2).unwrap();
let keys = vec!["city".to_string(), "country".to_string()];
let results = index.search_any_key(&keys).unwrap();
assert_eq!(results.len(), 2); }
#[test]
fn test_gin_index_all_keys_search() {
let mut index = GinIndex::new(
"test_idx".to_string(),
"test_table".to_string(),
"data".to_string(),
);
let json1 = json!({"name": "Alice", "city": "NYC", "age": 30});
let json2 = json!({"name": "Bob", "country": "USA"});
index.insert(1, &json1).unwrap();
index.insert(2, &json2).unwrap();
let keys = vec!["name".to_string(), "city".to_string()];
let results = index.search_all_keys(&keys).unwrap();
assert_eq!(results.len(), 1); assert_eq!(results[0], 1);
}
#[test]
fn test_gin_index_delete() {
let mut index = GinIndex::new(
"test_idx".to_string(),
"test_table".to_string(),
"data".to_string(),
);
let json = json!({"name": "Alice", "age": 30});
index.insert(1, &json).unwrap();
assert_eq!(index.indexed_rows, 1);
index.delete(1, &json).unwrap();
assert_eq!(index.indexed_rows, 0);
assert_eq!(index.total_keys, 0);
}
#[test]
fn test_gin_index_nested_json() {
let mut index = GinIndex::new(
"test_idx".to_string(),
"test_table".to_string(),
"data".to_string(),
);
let json = json!({
"user": {
"name": "Alice",
"address": {
"city": "NYC"
}
}
});
index.insert(1, &json).unwrap();
assert!(index.search_key("user").is_some());
assert!(index.search_key("name").is_some());
assert!(index.search_key("address").is_some());
assert!(index.search_key("city").is_some());
}
}