use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::fs;
use config::Config;
use config::File;
use config::FileFormat;
use serde_json::{json, Value};
use walkdir::WalkDir;
type IndexMap = HashMap<String, Vec<String>>;
pub struct Settings {
pub path_to_data: String,
pub path_to_save_index_map: String,
pub index_map_name: String,
}
impl Settings {
pub fn build() -> Self {
let builder = Config::builder()
.set_default("default", "1")
.expect("something error")
.add_source(File::new("settings.toml", FileFormat::Toml))
.set_override("override", "1")
.expect("something error");
match builder.build() {
Ok(config) => match config.get_table("general") {
Ok(config) => Self {
path_to_data: config
.get("path_to_data")
.expect("path_to_data is not exist")
.to_string(),
path_to_save_index_map: config
.get("path_to_save_index_map")
.expect("path_to_save_index_map is not exist")
.to_string(),
index_map_name: config
.get("index_map_name")
.expect("index_map_name is not exist")
.to_string(),
},
Err(err) => {
eprintln!("{}", err);
Self {
path_to_data: err.to_string(),
path_to_save_index_map: err.to_string(),
index_map_name: err.to_string(),
}
}
},
Err(err) => {
eprintln!("{}", err);
let error_message: String = "settings.toml was not found.".to_string();
Self {
path_to_data: error_message.clone(),
path_to_save_index_map: error_message.clone(),
index_map_name: error_message,
}
}
}
}
}
pub fn get_file_paths(path: &str) -> Vec<String> {
WalkDir::new(path)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.map(|e| e.into_path().to_string_lossy().into_owned())
.collect()
}
pub fn parse_words(mut index_map: IndexMap, contents: String, _file_path: String) -> IndexMap {
let words_data: Vec<&str> = contents.split_whitespace().collect();
for word in words_data {
match index_map.entry(word.trim().to_string()) {
Entry::Vacant(e) => {
e.insert(vec![_file_path.clone()]);
}
Entry::Occupied(mut e) => {
let is_exist_value: bool = e.get_mut().contains(&_file_path.clone());
if is_exist_value {
continue;
}
e.get_mut().push(_file_path.clone());
}
}
}
index_map
}
pub fn perform_indexing(settings: Settings) {
let mut index_map: IndexMap = HashMap::new();
let file_paths: Vec<String> = get_file_paths(settings.path_to_data.as_str());
for file_path in file_paths {
let contents: String =
fs::read_to_string(file_path.clone()).expect("Should have been able to read the file");
index_map = parse_words(index_map, contents, file_path);
}
let json = json!(index_map);
fs::write("index_map.json", json.to_string()).expect("file was not saved");
}
pub fn search_information(asked_data: String) -> Vec<Value> {
let index_map: String =
fs::read_to_string("index_map.json").expect("Should have been able to read the file");
let index_map_json: Value = serde_json::from_str(index_map.as_str()).unwrap();
let result: Vec<Value> = match index_map_json.get(asked_data) {
None => vec![serde_json::Value::String("".to_string())],
Some(value) => value
.as_array()
.expect("an error occurred during the conversion to an array")
.to_vec(),
};
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn search_information_ru_positive() {
let left: Vec<Value> = vec![Value::String(
"/home/user/zse/zse/test_data/file2.txt".to_string(),
)];
let right: Vec<Value> = search_information("Данные".to_string());
assert_eq!(left, right);
}
#[test]
fn search_information_ru_negative() {
let left: Vec<Value> = vec![Value::String("".to_string())];
let right: Vec<Value> = search_information("Дан".to_string());
assert_eq!(left, right);
}
#[test]
fn search_information_ch_positive() {
let left: Vec<Value> = vec![Value::String(
"/home/user/zse/zse/test_data/file1.txt".to_string(),
)];
let right: Vec<Value> = search_information("非常秘密的信息".to_string());
assert_eq!(left, right);
}
#[test]
fn search_information_ch_negative() {
let left: Vec<Value> = vec![Value::String("".to_string())];
let right: Vec<Value> = search_information("非常秘密的信".to_string());
assert_eq!(left, right);
}
#[test]
fn search_information_en_positive() {
let left: Vec<Value> = vec![Value::String(
"/home/user/zse/zse/test_data/file3.txt".to_string(),
)];
let right: Vec<Value> = search_information("information".to_string());
assert_eq!(left, right);
}
#[test]
fn search_information_en_negative() {
let left: Vec<Value> = vec![Value::String("".to_string())];
let right: Vec<Value> = search_information("doesntexiststring".to_string());
assert_eq!(left, right);
}
#[test]
fn get_file_paths_positive() {
let left: Vec<String> = vec![
"/home/user/zse/zse/test_data/file34.txt".to_string(),
"/home/user/zse/zse/test_data/file3.txt".to_string(),
"/home/user/zse/zse/test_data/file1.txt".to_string(),
"/home/user/zse/zse/test_data/file2.txt".to_string(),
];
let right: Vec<String> = get_file_paths("/home/user/zse/zse/test_data");
assert_eq!(left, right);
}
#[test]
fn get_file_paths_negative() {
let left: Vec<String> = vec![];
let right: Vec<String> = get_file_paths("/home/user/zse/zse/test_dat");
assert_eq!(left, right);
}
#[test]
fn parse_words_ru_positive() {
let mut left: IndexMap = HashMap::new();
left.insert(
"Данные".to_string(),
vec!["/home/user/zse/zse/test_data/file2.txt".to_string()],
);
left.insert(
"файла".to_string(),
vec!["/home/user/zse/zse/test_data/file2.txt".to_string()],
);
left.insert(
"2".to_string(),
vec!["/home/user/zse/zse/test_data/file2.txt".to_string()],
);
let index_map: IndexMap = HashMap::new();
let file_path = "/home/user/zse/zse/test_data/file2.txt".to_string();
let contents: String =
fs::read_to_string(file_path.clone()).expect("Should have been able to read the file");
let right: IndexMap = parse_words(index_map, contents, file_path);
assert_eq!(left, right);
}
#[test]
fn parse_words_ru_negative() {
let left: IndexMap = HashMap::new();
let index_map: IndexMap = HashMap::new();
let file_path = "/home/user/zse/zse/test_data/file34.txt".to_string();
let contents: String =
fs::read_to_string(file_path.clone()).expect("Should have been able to read the file");
let right: IndexMap = parse_words(index_map, contents, file_path);
assert_eq!(left, right);
}
#[test]
fn parse_words_en_positive() {
let mut left: IndexMap = HashMap::new();
left.insert(
"very".to_string(),
vec!["/home/user/zse/zse/test_data/file3.txt".to_string()],
);
left.insert(
"useful".to_string(),
vec!["/home/user/zse/zse/test_data/file3.txt".to_string()],
);
left.insert(
"information".to_string(),
vec!["/home/user/zse/zse/test_data/file3.txt".to_string()],
);
let index_map: IndexMap = HashMap::new();
let file_path = "/home/user/zse/zse/test_data/file3.txt".to_string();
let contents: String =
fs::read_to_string(file_path.clone()).expect("Should have been able to read the file");
let right: IndexMap = parse_words(index_map, contents, file_path);
assert_eq!(left, right);
}
#[test]
fn parse_words_en_negative() {
let left: IndexMap = HashMap::new();
let index_map: IndexMap = HashMap::new();
let file_path = "/home/user/zse/zse/test_data/file34.txt".to_string();
let contents: String =
fs::read_to_string(file_path.clone()).expect("Should have been able to read the file");
let right: IndexMap = parse_words(index_map, contents, file_path);
assert_eq!(left, right);
}
#[test]
fn parse_words_ch_positive() {
let mut left: IndexMap = HashMap::new();
left.insert(
"非常秘密的信息".to_string(),
vec!["/home/user/zse/zse/test_data/file1.txt".to_string()],
);
let index_map: IndexMap = HashMap::new();
let file_path = "/home/user/zse/zse/test_data/file1.txt".to_string();
let contents: String =
fs::read_to_string(file_path.clone()).expect("Should have been able to read the file");
let right: IndexMap = parse_words(index_map, contents, file_path);
assert_eq!(left, right);
}
}