number-based 0.2.3

number-based is an attempt of mine to make working with number bases simple.
Documentation
//! # Feature - non_standard_graphemes
//!
//! The standard graphemes that come with number-based can be changed using the feature
//! custom_graphemes. Simply add the feature to cargo.toml
//! ```toml
//! [dependencies]
//! number-based = { version = "^0.2.2", features = ["custom_graphemes"] }
//! ```
//! (Or run the package with the `--features non_standard_graphemes` flag)
//!
//! Then create a file called graphemes.json in the package root directory.
//! The file should be structured like this:
//! ```json
//! {
//!     "0": "0",
//!     "1": "1",
//!     "2": "2",
//!     "3": "3",
//!     "4": "4",
//!     "5": "5",
//!     "6": "6",
//!     "7": "7",
//!     "8": "8",
//!     "9": "9",
//!     "10": "a",
//!     "11": "b",
//!     "12": "c",
//!     "13": "d",
//!     "14": "e",
//!     "15": "f",
//! }
//! ```
//! pairing each number in base 10 with the desired grapheme.
//! In the example above, the number 14 in base > 14 would be
//! represented as "e".
//!
//! There is a limit of 2^16 graphemes as a maximum, but there
//! is no requirement as to how many should be used. You could
//! specify 50 numbers or 50 000. Do note however that every
//! number up until the maximum value must be specified. In
//! the example above, we cannot leave out for instance 12,
//! as that would create a gap in the graphemes list. Also note
//! that after specifying graphemes, bases above the maximum
//! specified value cannot be used. For instance, if the
//! highest specified number is 59, creating a number in base
//! 73 is not possible.
//!
//! When specifying pairs in graphemes.json, there may not be two
//! definitions for the same number, nor may two numbers have the
//! same definition.
//!
//! This is NOT allowed:
//! ```json
//! {
//!     "0": "0",
//!     "1": "1",
//!     "2": "2",
//!     ...
//!     "458": "ʯ",
//!     ...
//!     "458": "ʠ",
//!     ...
//! }
//! ```
//! because there are conflicting definitions for the number 458.
//!
//!
//! This is NOT allowed:
//! ```json
//! {
//!     "0": "0",
//!     "1": "1",
//!     "2": "2",
//!     ...
//!     "782": "Σ",
//!     ...
//!     "971": "Σ",
//!     ...
//! }
//! ```
//! because there are two numbers with the same definition of "Σ".

use std::str;
use std::{
    collections::HashMap,
    fs,
    io::{Error, ErrorKind},
    path::Path,
};

use crate::utils;

pub fn create_non_standard() -> Result<HashMap<u16, [u8; 4]>, Error> {
    let mut json_content = read_file()?;
    let pairs = parse_json(&mut json_content);
    create_map(pairs)
}

fn read_file() -> Result<String, Error> {
    let path = Path::new("./graphemes.json");
    if !path.exists() {
        return Err(Error::new(
            ErrorKind::NotFound,
            String::from("Could not find file named 'graphemes.json' in package root directory"),
        ));
    }

    let contents = fs::read_to_string(path)?;
    Ok(contents)
}

fn parse_json(json_content: &mut String) -> Vec<Vec<&str>> {
    json_content.retain(|c| c != ' ' && c != '\n' && c != '{' && c != '}' && c != '\"');

    json_content
        .split(',')
        .map(|p| {
            let pair: Vec<&str> = p.split(':').collect();
            if pair.len() == 2 {
                Some(pair)
            } else {
                None
            }
        })
        .filter(|p| p.is_some())
        .map(|o| {
            // because we've filtered out all None values, we can safely unwrap all remaining
            // values
            unsafe { o.unwrap_unchecked() }
        })
        .collect()
}

fn create_map(pairs: Vec<Vec<&str>>) -> Result<HashMap<u16, [u8; 4]>, Error> {
    let mut graphemes: HashMap<u16, [u8; 4]> = HashMap::new();

    // we need to do a couple of tests to make sure the map is valid
    // 1. There cannot be any overlap. There cannot be two numbers with the same grapheme, and
    //    there cannot be two graphemes with the same number. This should be checked while creating
    //    the map. Upon the detection of an already used number/grapheme, the function should panic!
    // 2. There must be a grapheme for every number up until the last specified number.There cannot
    //    be any gaps. This should be checked after the map is created because the order in the
    //    json documents should not matter.

    for p in pairs {
        let number: u16 = match p[0].parse() {
            Ok(number) => number,
            Err(_) => {
                return Err(Error::new(
                    ErrorKind::Other,
                    format!("Could not parse provided key '{}' to u16", p[0]),
                ))
            }
        };
        if p[1] == "-" {
            return Err(Error::new(
                ErrorKind::InvalidData,
                "Cannot use '-' as grapheme because it is used as a negative sign",
            ));
        }
        let bytes = p[1].as_bytes();
        let mut rep = vec![0; 4 - bytes.len()];
        for b in bytes {
            rep.push(*b);
        }

        if graphemes.contains_key(&number) {
            return Err(Error::new(ErrorKind::InvalidInput, format!("The same number cannot be used twice. Found two specifications for number {number}")));
        }

        let byte_arr = utils::vec_to_arr(rep);

        for g in graphemes.values() {
            if g == &byte_arr {
                return Err(Error::new(ErrorKind::InvalidInput, format!("The same grapheme cannot be used twice. Found two specifications for grapheme {}", str::from_utf8(g).unwrap())));
            }
        }
        graphemes.insert(number, byte_arr);
    }

    let max_key = match graphemes.keys().max() {
        Some(value) => value,
        None => {
            return Err(Error::new(
                ErrorKind::Other,
                "Could not parse json. No value provided",
            ))
        }
    };
    for i in 0..*max_key {
        if !graphemes.contains_key(&i) {
            return Err(Error::new(ErrorKind::InvalidInput, format!("All values up until maximum specified value ({max_key}) must be accounted for. Could not find specification for {i}.")));
        }
    }

    Ok(graphemes)
}