use std::collections::{HashMap, HashSet};
use std::convert::TryInto;
use std::fs;
use serde_json::Value;
use crate::{error::DbError, pager::Pager};
pub struct YourDb {
pager: Pager,
index: HashMap<String, u32>, secondary_indexes: HashMap<String, HashMap<Value, HashSet<String>>>,
next_page_id: u32, indexed_fields: HashSet<String>,
}
impl YourDb {
pub fn open(path: &str) -> Result<Self, DbError> {
let pager = Pager::new(path)?;
let index: HashMap<String, u32> = match pager.get_page(0) {
Ok(page0) => match Self::read_json_from_page(page0) {
Ok(val) => serde_json::from_value(val).unwrap_or_default(),
Err(_) => HashMap::new(),
},
Err(_) => HashMap::new(),
};
let next_page_id = index.values().copied().max().unwrap_or(0).saturating_add(1);
let mut secondary_indexes: HashMap<String, HashMap<Value, HashSet<String>>> = HashMap::new();
for (key, &page_id) in &index {
if let Ok(data) = pager.get_page(page_id) {
match Self::read_json_from_page(&data) {
Ok(val) => {
if let Some(obj) = val.as_object() {
for (field, field_value) in obj {
let entry = secondary_indexes
.entry(field.clone())
.or_default()
.entry(field_value.clone())
.or_default();
entry.insert(key.clone());
}
}
}
Err(_) => {
continue;
}
}
}
}
Ok(Self {
pager,
index,
secondary_indexes,
next_page_id,
indexed_fields: HashSet::new(),
})
}
fn read_json_from_page(page: &[u8]) -> Result<Value, DbError> {
if page.len() < 4 {
return Err(DbError::Other("Corrupted page: too short".into()));
}
let len_bytes: [u8; 4] = page[..4].try_into().map_err(|_| DbError::Other("Failed to read length".into()))?;
let len = u32::from_le_bytes(len_bytes) as usize;
if page.len() < 4 + len {
return Err(DbError::Other("Corrupted page: length out of bounds".into()));
}
let json = serde_json::from_slice(&page[4..4 + len]).map_err(|e| DbError::Other(format!("Invalid JSON on page: {}", e)))?;
Ok(json)
}
pub fn put(&mut self, key: &str, value: &Value) -> Result<(), DbError> {
if key.is_empty() {
return Err(DbError::Other("Key must not be empty".into()));
}
let json_bytes = serde_json::to_vec(value).map_err(|e| DbError::Other(format!("Serialize error: {}", e)))?;
if (json_bytes.len() + 4) > crate::util::PAGE_SIZE {
return Err(DbError::Other("JSON too large for page".into()));
}
if let Some(existing_val) = self.get(key)? {
if let Some(obj) = existing_val.as_object() {
for (field, field_value) in obj {
if !self.indexed_fields.is_empty() && !self.indexed_fields.contains(field) {
continue;
}
if let Some(val_map) = self.secondary_indexes.get_mut(field) {
if let Some(keys) = val_map.get_mut(field_value) {
keys.remove(key);
if keys.is_empty() {
val_map.remove(field_value);
}
}
if val_map.is_empty() {
self.secondary_indexes.remove(field);
}
}
}
}
}
let mut page_data = vec![0u8; crate::util::PAGE_SIZE];
page_data[..4].copy_from_slice(&(json_bytes.len() as u32).to_le_bytes());
page_data[4..4 + json_bytes.len()].copy_from_slice(&json_bytes);
let page_id = if let Some(&existing_page_id) = self.index.get(key) {
existing_page_id
} else {
let pid = self.next_page_id;
self.next_page_id = self.next_page_id.saturating_add(1);
pid
};
self.pager.write_page(page_id, &page_data)?;
self.index.insert(key.to_string(), page_id);
if let Some(obj) = value.as_object() {
for (field, field_value) in obj {
if !self.indexed_fields.is_empty() && !self.indexed_fields.contains(field) {
continue;
}
let entry = self.secondary_indexes
.entry(field.clone())
.or_default()
.entry(field_value.clone())
.or_default();
entry.insert(key.to_string());
}
}
Ok(())
}
pub fn get(&self, key: &str) -> Result<Option<Value>, DbError> {
if let Some(&page_id) = self.index.get(key) {
let data = self.pager.get_page(page_id)?;
match Self::read_json_from_page(&data) {
Ok(json) => Ok(Some(json)),
Err(_) => Ok(None), }
} else {
Ok(None)
}
}
pub fn flush(&mut self) -> Result<(), DbError> {
let index_bytes = serde_json::to_vec(&self.index).map_err(|e| DbError::Other(format!("Index serialize error: {}", e)))?;
let mut page = vec![0u8; crate::util::PAGE_SIZE];
if index_bytes.len() + 4 > page.len() {
return Err(DbError::Other("Index too large for a single page".into()));
}
page[..4].copy_from_slice(&(index_bytes.len() as u32).to_le_bytes());
page[4..4 + index_bytes.len()].copy_from_slice(&index_bytes);
self.pager.write_page(0, &page)?;
self.pager.flush()
}
pub fn delete(&mut self, key: &str) -> Result<(), DbError> {
if let Some(existing_val) = self.get(key)? {
if let Some(obj) = existing_val.as_object() {
for (field, field_value) in obj {
if !self.indexed_fields.is_empty() && !self.indexed_fields.contains(field) {
continue;
}
if let Some(val_map) = self.secondary_indexes.get_mut(field) {
if let Some(keys) = val_map.get_mut(field_value) {
keys.remove(key);
if keys.is_empty() {
val_map.remove(field_value);
}
}
if val_map.is_empty() {
self.secondary_indexes.remove(field);
}
}
}
}
}
if self.index.remove(key).is_some() {
Ok(())
} else {
Err(DbError::Other("Key not found".into()))
}
}
pub fn get_field(&self, key: &str, field: &str) -> Result<Option<Value>, DbError> {
if let Some(val) = self.get(key)? {
Ok(val.get(field).cloned())
} else {
Ok(None)
}
}
pub fn filter<F>(&self, predicate: F) -> Result<Vec<(String, Value)>, DbError>
where
F: Fn(&Value) -> bool,
{
let mut results = Vec::new();
for key in self.index.keys() {
if let Some(val) = self.get(key)? {
if predicate(&val) {
results.push((key.clone(), val));
}
}
}
Ok(results)
}
pub fn query(&self, field: &str, value: impl Into<Value>) -> Result<Vec<String>, DbError> {
let val = value.into();
if let Some(val_map) = self.secondary_indexes.get(field) {
if let Some(keys) = val_map.get(&val) {
return Ok(keys.iter().cloned().collect());
}
}
Ok(vec![])
}
pub fn query_page(&self, field: &str, value: impl Into<Value>, limit: usize, offset: usize) -> Result<Vec<String>, DbError> {
let keys = self.query(field, value)?;
Ok(keys.into_iter().skip(offset).take(limit).collect())
}
pub fn export_query(&self, field: &str, value: impl Into<Value>, path: &str) -> Result<(), DbError> {
let keys = self.query(field, value)?;
let mut map = HashMap::new();
for k in keys {
if let Some(v) = self.get(&k)? {
map.insert(k, v);
}
}
fs::write(path, serde_json::to_string_pretty(&map).map_err(|e| DbError::Other(format!("{}", e)))?)?;
Ok(())
}
pub fn export_to_file(&self, path: &str) -> Result<(), DbError> {
let mut map = HashMap::new();
for (k, _) in &self.index {
if let Some(v) = self.get(k)? {
map.insert(k.clone(), v);
}
}
let json = serde_json::to_string_pretty(&map).map_err(|e| DbError::Other(format!("{}", e)))?;
fs::write(path, json)?;
Ok(())
}
pub fn show_all(&self) -> Result<(), DbError> {
for (key, &page_id) in &self.index {
let page = self.pager.get_page(page_id)?;
let json: serde_json::Value = match Self::read_json_from_page(&page) {
Ok(v) => v,
Err(_) => serde_json::json!(null),
};
println!("{} => {}", key, json);
}
Ok(())
}
pub fn range_query(&self, field: &str, min: Value, max: Value) -> Result<Vec<String>, DbError> {
if let Some(val_map) = self.secondary_indexes.get(field) {
let mut results = Vec::new();
let min_n = min.as_f64();
let max_n = max.as_f64();
if min_n.is_none() || max_n.is_none() {
return Ok(vec![]);
}
let min_n = min_n.unwrap();
let max_n = max_n.unwrap();
for (val, keys) in val_map {
if let Some(n) = val.as_f64() {
if n >= min_n && n <= max_n {
results.extend(keys.iter().cloned());
}
}
}
return Ok(results);
}
Ok(vec![])
}
pub fn update_field(&mut self, key: &str, field: &str, new_value: Value) -> Result<(), DbError> {
if let Some(mut val) = self.get(key)? {
if let Some(obj) = val.as_object_mut() {
if let Some(old_val) = obj.get(field) {
if !self.indexed_fields.is_empty() && !self.indexed_fields.contains(field) {
} else if let Some(val_map) = self.secondary_indexes.get_mut(field) {
if let Some(keys) = val_map.get_mut(old_val) {
keys.remove(key);
if keys.is_empty() {
val_map.remove(old_val);
}
}
if val_map.is_empty() {
self.secondary_indexes.remove(field);
}
}
}
obj.insert(field.to_string(), new_value.clone());
}
self.put(key, &val)?;
} else {
return Err(DbError::Other("Key not found".into()));
}
Ok(())
}
pub fn search_contains(&self, field: &str, substring: &str) -> Result<Vec<String>, DbError> {
let mut results = Vec::new();
if let Some(val_map) = self.secondary_indexes.get(field) {
for (val, keys) in val_map {
if let Some(s) = val.as_str() {
if s.contains(substring) {
results.extend(keys.iter().cloned());
}
}
}
}
Ok(results)
}
pub fn list_keys(&self) -> Vec<String> {
self.index.keys().cloned().collect()
}
pub fn count(&self) -> usize {
self.index.len()
}
pub fn compact(&mut self) -> Result<(), DbError> {
let mut new_index = HashMap::new();
let mut new_secondary: HashMap<String, HashMap<Value, HashSet<String>>> = HashMap::new();
let mut new_page_id: u32 = 1;
for key in self.index.keys().cloned().collect::<Vec<_>>() {
if let Some(val) = self.get(&key)? {
let json_bytes = serde_json::to_vec(&val).map_err(|e| DbError::Other(format!("Serialize error: {}", e)))?;
if (json_bytes.len() + 4) > crate::util::PAGE_SIZE {
return Err(DbError::Other(format!("Record too large during compact: key={}", key)));
}
let mut page_data = vec![0u8; crate::util::PAGE_SIZE];
page_data[..4].copy_from_slice(&(json_bytes.len() as u32).to_le_bytes());
page_data[4..4 + json_bytes.len()].copy_from_slice(&json_bytes);
self.pager.write_page(new_page_id, &page_data)?;
new_index.insert(key.clone(), new_page_id);
if let Some(obj) = val.as_object() {
for (field, field_value) in obj {
if !self.indexed_fields.is_empty() && !self.indexed_fields.contains(field) {
continue;
}
new_secondary
.entry(field.clone())
.or_default()
.entry(field_value.clone())
.or_default()
.insert(key.clone());
}
}
new_page_id = new_page_id.saturating_add(1);
}
}
self.index = new_index;
self.secondary_indexes = new_secondary;
self.next_page_id = new_page_id;
self.flush()
}
}