use super::Index;
use crate::{ApiConfig, BuildConfig, QueryConfig, chunk};
use crate::chunk::ChunkSource;
use crate::constant::{FILE_INDEX_DIR_NAME, INDEX_DIR_NAME};
use crate::error::Error;
use crate::index::{
IIStatus,
tfidf,
};
use crate::uid::{self, Uid, UidWriteMode};
use ragit_fs::{
WriteMode,
create_dir_all,
exists,
join3,
parent,
read_dir,
read_string,
remove_dir_all,
remove_file,
set_extension,
try_create_dir,
write_bytes,
};
use std::collections::hash_map::{Entry, HashMap};
use std::fmt;
pub type Path = String;
#[derive(Clone, Debug)]
pub struct RecoverResult {
removed_chunks: usize,
created_tfidfs: usize,
replaced_configs: Vec<String>,
staged_files: Vec<String>,
}
impl RecoverResult {
pub fn is_empty(&self) -> bool {
self.removed_chunks == 0
&& self.created_tfidfs == 0
&& self.replaced_configs.is_empty()
&& self.staged_files.is_empty()
}
}
impl fmt::Display for RecoverResult {
fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(
fmt,
"{} chunks removed, {} tfidfs created, {} configs replaced, {} files staged",
self.removed_chunks,
self.created_tfidfs,
self.replaced_configs.len(),
self.staged_files.len(),
)
}
}
impl Index {
pub fn recover(&mut self) -> Result<RecoverResult, Error> {
let mut processed_files: HashMap<Path, Vec<(Uid, usize)>> = HashMap::new();
let mut chunk_count = 0;
let mut result = RecoverResult {
removed_chunks: 0,
created_tfidfs: 0,
replaced_configs: vec![],
staged_files: vec![],
};
for chunk_file in self.get_all_chunk_files()? {
let chunk_ = chunk::load_from_file(&chunk_file)?;
let tfidf_file = set_extension(&chunk_file, "tfidf")?;
match &chunk_.source {
ChunkSource::File { path, .. } => {
if !self.processed_files.contains_key(path) {
remove_file(&chunk_file)?;
if exists(&tfidf_file) {
remove_file(&tfidf_file)?;
}
result.removed_chunks += 1;
continue;
}
},
}
let corrupted_tfidf_file = !exists(&tfidf_file) || tfidf::load_from_file(&tfidf_file).is_err();
if corrupted_tfidf_file {
chunk::save_to_file(
&chunk_file,
&chunk_,
self.build_config.compression_threshold,
self.build_config.compression_level,
&self.root_dir,
true, )?;
result.created_tfidfs += 1;
}
let ChunkSource::File { path, index, page: _ } = &chunk_.source;
match processed_files.entry(path.to_string()) {
Entry::Occupied(mut chunks) => {
chunks.get_mut().push((chunk_.uid, *index));
},
Entry::Vacant(e) => {
e.insert(vec![(chunk_.uid, *index)]);
},
}
chunk_count += 1;
}
let file_index_path = join3(
&self.root_dir,
&INDEX_DIR_NAME,
&FILE_INDEX_DIR_NAME,
)?;
if !exists(&file_index_path) {
create_dir_all(&file_index_path)?;
}
for dir in read_dir(&file_index_path, false)? {
remove_dir_all(&dir)?;
}
for (file, mut chunks) in processed_files.into_iter() {
chunks.sort_by_key(|(_, index)| *index);
if chunks[0].1 != 0 {
return Err(Error::BrokenIndex(format!("The first chunk of `{file}` is missing.")));
}
chunks.dedup_by_key(|(_, index)| *index);
if chunks[chunks.len() - 1].1 != chunks.len() - 1 {
return Err(Error::BrokenIndex(format!("Some chunks of `{file}` is missing.")));
}
let file_uid = self.processed_files.get(&file).unwrap();
let file_index_path = Index::get_uid_path(
&self.root_dir,
FILE_INDEX_DIR_NAME,
*file_uid,
None,
)?;
let parent_path = parent(&file_index_path)?;
if !exists(&parent_path) {
try_create_dir(&parent_path)?;
}
uid::save_to_file(
&file_index_path,
&chunks.iter().map(|(uid, _)| *uid).collect::<Vec<_>>(),
UidWriteMode::Naive,
)?;
}
if self.curr_processing_file.is_some() {
self.curr_processing_file = None;
}
self.chunk_count = chunk_count;
let reset_build_config = match read_string(&self.get_build_config_path()?) {
Ok(j) => serde_json::from_str::<BuildConfig>(&j).is_err(),
_ => true,
};
if reset_build_config {
write_bytes(
&self.get_build_config_path()?,
&serde_json::to_vec_pretty(&BuildConfig::default())?,
WriteMode::Atomic,
)?;
result.replaced_configs.push(String::from("build"));
}
let reset_query_config = match read_string(&self.get_query_config_path()?) {
Ok(j) => serde_json::from_str::<QueryConfig>(&j).is_err(),
_ => true,
};
if reset_query_config {
write_bytes(
&self.get_query_config_path()?,
&serde_json::to_vec_pretty(&QueryConfig::default())?,
WriteMode::Atomic,
)?;
result.replaced_configs.push(String::from("query"));
}
let reset_api_config = match read_string(&self.get_api_config_path()?) {
Ok(j) => serde_json::from_str::<ApiConfig>(&j).is_err(),
_ => true,
};
if reset_api_config {
write_bytes(
&self.get_api_config_path()?,
&serde_json::to_vec_pretty(&ApiConfig::default())?,
WriteMode::Atomic,
)?;
result.replaced_configs.push(String::from("api"));
}
if (result.removed_chunks > 0 || result.created_tfidfs > 0) && self.ii_status != IIStatus::None {
self.ii_status = IIStatus::Outdated;
}
self.calculate_and_save_uid()?;
self.save_to_file()?;
Ok(result)
}
}