word_replace 0.0.3

word replacement tool for a text file with toml-based dictionary
extern crate toml;
extern crate regex;

use std::collections::HashMap;
use std::fs::copy as file_copy;
use std::fs::create_dir_all;
use std::fs::read_dir;
use std::fs::File;
use std::io::prelude::*;
use std::path::Path;
use self::toml::Value;
use self::regex::Regex;

pub fn read_dictionary_toml_file(filename: &Path) -> Value {
    let mut dic_file = File::open(filename).expect("could not open dic.toml");
    let mut dic_string = String::new();
    dic_file.read_to_string(&mut dic_string).expect("could not read contents from dic.toml");
    dic_string.parse::<Value>().expect("could not parse dic.toml")
}

pub fn get_dictionary(toml_value: &Value, language: &str) -> Result<HashMap<String, String>, &'static str> {
    let mut dictionary_map: HashMap<String, String> = HashMap::new();
    if let &Value::Table(ref dic_table) = toml_value {
        match dic_table.get(language) {
            Some(dic_contents) => {
                if let Value::Table(ref dic) = *dic_contents {
                    for (keyword, translate) in dic {
                        match *translate {
                            Value::String(ref x) => {
                                dictionary_map.insert(keyword.clone(), x.clone());
                            },
                            _ => (),
                        };
                    }
                }
            }
            None => {
                return Err("cannot find appropriate language from dictionary");
            }
        }
        Ok(dictionary_map)
    }
    else {
        Err("cannot find toml tables")
    }
}

#[derive(Debug,Clone)]
pub struct PostPosPair {
    pub left: String,
    pub right: String,
}

pub trait PostPosPairArrayInterface {
    fn find(&self, s: &str) -> Option<&PostPosPair>;
}

impl PostPosPairArrayInterface for Vec<PostPosPair> {
    fn find(&self, s: &str) -> Option<&PostPosPair> {
        for pair in self {
            if pair.left == s || pair.right == s {
                return Some(&pair);
            }
        }
        None
    }
}

fn is_korean_character_and_has_final_jamo(c: char) -> (bool, bool) {
    let code_point = c as u32;
    let is_korean_character = code_point >= 0xAC00 && code_point <= 0xD7A3;
    if is_korean_character {
        (true, (code_point - 0xAC00) % 28 != 0)
    }
    else {
        (false, false)
    }
}

pub fn get_postpos_pairs(toml_value: &Value, language: &str) -> Option<Vec<PostPosPair>> {
    let mut postpos_pairs = Vec::<PostPosPair>::new();
    if let &Value::Table(ref all_tables) = toml_value {
        match all_tables.get(&(String::from(language) + "-postpos")) {
            Some(postpos_contents) => {
                let postpos_table = match postpos_contents { &Value::Table(ref t) => Some(t), _ => None }.unwrap();
                let postpos_pairs_toml = match postpos_table.get("postpos").unwrap() { &Value::Array(ref t) => Some(t), _ => None}.unwrap();
                for pair in postpos_pairs_toml {
                    let p = match pair { &Value::Array(ref t) => Some(t), _ => None}.unwrap();
                    let left = match p.get(0).unwrap() { &Value::String(ref s) => Some(s), _ => None}.unwrap();
                    let right = match p.get(1).unwrap() { &Value::String(ref s) => Some(s), _ => None}.unwrap();
                    postpos_pairs.push(PostPosPair { left:left.clone(), right:right.clone(), });
                }
            }
            None => {
                return None;
            }
        }
        Some(postpos_pairs)
    }
    else {
        None
    }
}

pub fn process_file(src_filepath: &Path,
    dst_filepath: &Path,
    dictionary_map: &HashMap<String, String>,
    postpos_pairs_option: Option<&Vec<PostPosPair>>,
    show_warning: bool) {
    let re = Regex::new(r"@@[a-z|A-Z|\d]+@@").unwrap();

    let mut src_file = File::open(src_filepath).expect("failed to open file");
    let mut src_string = String::new();
    let src_read_result = src_file.read_to_string(&mut src_string);
    match src_read_result {
        Err(_) => {
            if show_warning {
                println!("{} is not a text file, so just copied.", src_filepath.to_str().unwrap());
                file_copy(src_filepath, dst_filepath).expect("failed to copy file");
                return;
            }
        }
        _ => {},
    }
    
    let mut dst_string = String::new();

    let mut last_index = 0;
    for c in re.captures_iter(&src_string) {
        let m = c.get(0).unwrap();
        let key = &src_string[m.start()+2..m.end()-2];
        let mut additional_advance = 0;
        dst_string += &src_string[last_index..m.start()];
        match dictionary_map.get(key) {
            Some(word) => {
                dst_string += word;
                if let Some(postpos_pairs) = postpos_pairs_option {
                    let is_whitespace = src_string[m.end()..].chars().next().unwrap_or(' ').is_whitespace();
                    let next = if is_whitespace { None } else { src_string[m.end()..].split_whitespace().next() };
                    match next {
                        Some(postpos) => {
                            let found_postpos_pair = postpos_pairs.find(postpos);
                            let (word_is_korean, word_has_final_jamo) =
                                is_korean_character_and_has_final_jamo(word.chars().last().unwrap());
                            let (postpos_is_korean, _) = 
                                is_korean_character_and_has_final_jamo(postpos.chars().next().unwrap());
                            if word_is_korean {
                                if let Some(found_postpos_pair) = found_postpos_pair {
                                    dst_string += if word_has_final_jamo
                                        { found_postpos_pair.right.as_str() } else { found_postpos_pair.left.as_str() };
                                    additional_advance = postpos.len();
                                }
                                else {
                                    if postpos_is_korean && show_warning {
                                        println!("warning: undefined korean postposition at {}: {}{} => {}{}",
                                            src_filepath.to_str().unwrap(), m.as_str(), postpos,
                                            word, postpos);
                                    }
                                }
                            }
                        },
                        None => {},
                    };
                }
            },
            _ => {
                dst_string += m.as_str();
            },
        };
        last_index = m.end();
        last_index += additional_advance;
    }
    dst_string += &src_string[last_index..];

    let mut dst_file = File::create(dst_filepath).expect("failed to open destination file");
    dst_file.write_all(dst_string.as_bytes()).expect("failed to write destination file");
}


pub fn process_directory(input_directory: &Path,
    output_directory: &Path,
    dictionary_map: &HashMap<String, String>,
    postpos_pairs_option: Option<&Vec<PostPosPair>>,
    show_warning: bool) {
    create_dir_all(&output_directory).expect("cannot create output directory");
    let paths = read_dir(&input_directory).expect("failed to read input directory");
    for p in paths {
        let src_path = p.expect("failed to get source file path").path();
        let dst_path = output_directory.join(src_path.strip_prefix(&input_directory).unwrap());
        if src_path.is_dir() {
            process_directory(&src_path, &dst_path, dictionary_map, postpos_pairs_option, show_warning);
        }
        else {
            if show_warning {
                println!("processing: {}", src_path.to_str().unwrap());
            }
            process_file(&src_path, &dst_path, &dictionary_map, postpos_pairs_option, show_warning);
        }
    }
}