//! A [Base62](https://en.wikipedia.org/wiki/Base62) encoder / decoder with
//! support for leading zero bytes.
//!
//! Normally, during the conversion of base10 (decimal) to base62, the input data
//! is interpreted as one large number:
//!
//! `[0x00, 0x13, 0x37] => 0x001337 => 4919 (decimal)`
//!
//! As leading zeroes do not count to the value of a number (`001337 = 1337`),
//! they are ignored while converting the number to base62.
//!
//! - This is exactly what the [`encode_num`] function does.
//! - The [`encode_data`] keeps these leading zeroes.
//!
//! This is achieved by prepending a `0x01` byte to the data before encoding,
//! thus creating a number that starts with a `1`: `001337 => 1001337` (No zeroes
//! are removed)
//!
//! The leading `0x01` is removed after the data has been decoded from bse62 back
//! to base10: `1001337 => 001337`
//!
//!
//!
//! ## Alphabet
//!
//! ```txt
//! 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
//! ```
//!
//! This is the same alphabet that
//! [CyberChef](https://gchq.github.io/CyberChef/#recipe=To_Base62('0-9A-Za-z'))
//! uses by default: `[0-9][A-Z][a-z]`.
//!
//! [Wikipedia/Base62](https://en.wikipedia.org/wiki/Base62) suggests another
//! alphabet (`[A-Z][a-z][0-9]`) but I found that many online converters use
//! either `[0-9][A-Z][a-z]` or `[0-9][a-z][A-Z]`. And as I love
//! [CyberChef](https://gchq.github.io/CyberChef/), I decided to use their
//! default alphabet aswell. I also think that starting with numbers is more
//! natural as base62 is actually a number system like decimal (which is actually
//! base10) or hexa-decimal (base16).
//!
//!
//! ## Examples
//!
//! ### Convert Data to Base62
//!
//! This method will prepend `0x01` to the data before encoding it.
//!
//! ```rust
//! let data = vec![0x13, 0x37];
//! let encoded = bs62::encode_data(&data);
//!
//! assert_eq!(encoded, "IKN")
//! ```
//!
//!
//!
//! ### Parse Base62 to Data
//!
//! This method expects a leading `0x01` in the byte array after decoding. It
//! removes the first byte before returning the byte array.
//!
//! ```rust
//! # use std::error::Error;
//! #
//! # fn main() -> Result<(), Box<dyn Error>> {
//! #
//! let encoded = "IKN";
//! let data = bs62::decode_data(&encoded)?;
//!
//! assert_eq!(data, vec![0x13_u8, 0x37]);
//! #
//! #     Ok(())
//! # }
//! ```
//!
//!
//! ### Convert a Number to Base62
//!
//! ```rust
//! let num = 1337;
//! let encoded = bs62::encode_num(&num);
//!
//! assert_eq!(encoded, "LZ")
//! ```
//!
//!
//!
//! ### Parse Base62 to Number
//!
//! ```rust
//! let num = 1337;
//! let encoded = bs62::encode_num(&num);
//!
//! assert_eq!(encoded, "LZ")
//! ```

pub extern crate num_bigint;
pub extern crate num_traits;

use lazy_static::lazy_static;
use num_bigint::{BigUint, ToBigUint};
use num_traits::{ToPrimitive, Zero};
use std::{convert::TryInto, error::Error, usize};

const BASE: usize = 62;

/// This is the same alphabet that
/// [CyberChef](https://gchq.github.io/CyberChef/#recipe=To_Base62('0-9A-Za-z'))
/// uses by default: `[0-9][A-Z][a-z]`.
///
/// [Wikipedia/Base62](https://en.wikipedia.org/wiki/Base62) suggests another
/// alphabet (`[A-Z][a-z][0-9]`) but I found that starting with numbers is more
/// natural as base62 is actually a number system like decimal (which is actually
/// base10).
const ALPHABET_STR: &str =
    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";

lazy_static! {
    // Convert alphabet string to char array for faster access
    static ref ALPHABET: [char; BASE] = ALPHABET_STR
        .chars()
        .collect::<Vec<char>>()
        .try_into()
        .unwrap();
}

/// Encode data with support for leading zeroes by prepending a `0x01` byte to
/// the data.
///
/// This is necessary because for base62 encoding, the input data is treated as
/// one lagre number. And as in the decimal system, leading zeroes are ignored
/// (`0001337 = 1337`).
///
/// ## Algorithm
///
/// For how the actual encoding works, see [`encode_num#algorithm`].
///
/// ## Example
///
/// ```rust
/// let data = vec![0x13, 0x37];
/// let encoded = bs62::encode_data(&data);
///
/// assert_eq!(encoded, "IKN")
/// ```
pub fn encode_data(data: &[u8]) -> String {
    let mut data = data.to_owned();

    // Prepend a byte to keep leading null bytes
    data.insert(0, 0x01);

    let num = BigUint::from_bytes_be(&data);

    encode_num(&num)
}

/// Convert a base10 (decimal) number to base62.
///
/// ## Algorithm
///
/// The input number is divided (with reminder) by 62 (the base). The remainder
/// is then used as an index to get a char from the alphabet `[0-9][A-Z][a-z]`
/// and the char is stored an a list. The floored quotiont is then used as the
/// new number and is again divided (with remainder) by 62 and so on. At the end,
/// when the number becomes zero, and the las remainer was used to get char from
/// the alphabet, the list of chars is reversed and converted to a string.
///
/// ## Panics
///
/// Panics if the `num` can not be converted to [`BigUint`] (e.g. if `num` is negative).
///
/// ## Example
///
/// ```rust
/// let num = 1337;
/// let encoded = bs62::encode_num(&num);
///
/// assert_eq!(encoded, "LZ")
/// ```
pub fn encode_num<T: ToBigUint>(num: &T) -> String {
    let base = BigUint::from(BASE);
    let zero = BigUint::zero();

    let mut num = num
        .to_biguint()
        .expect("Failed to convert `num` to `BigUint`");

    if num == zero {
        return "0".to_owned();
    }

    let mut digits = vec![];

    while num > zero {
        let rem = (&num % &base)
            .to_usize()
            .expect("THIS SHOULD NEVER HAPPEN: Failed converting to `usize`");
        digits.push(ALPHABET[rem]);
        num /= &base;
    }

    digits.iter().rev().collect()
}

/// Decode data with support for leading zeroes (that has been encoded using
/// [`encode_data`]).
///
/// To keep leading zeroes, the data has before been prepended with a `0x01`
/// byte. To undo that, the `0x01` byte is removed after decoding.
///
/// If the decoded data does not start with a `0x01` byte, an error is returned.
///
/// ## Algorithm
///
/// For how the actual decoding works, see [`decode_num#algorithm`].
///
/// ## Errors
///
/// An error variant is returned
/// 
/// - when the input string contains invlid chars
/// - if the data was not encoded with the magic byte `0x01` at the beginning
///   (described in [`encode_data`]). It is then considered invalid data.
///
/// ## Example
///
/// ```rust
/// # use std::error::Error;
/// #
/// # fn main() -> Result<(), Box<dyn Error>> {
/// #
/// let encoded = "IKN";
/// let data = bs62::decode_data(&encoded)?;
///
/// assert_eq!(data, vec![0x13_u8, 0x37]);
/// #
/// #     Ok(())
/// # }
/// ```
pub fn decode_data(inp: &str) -> Result<Vec<u8>, Box<dyn Error>> {
    let num = decode_num(inp)?;
    let data = num.to_bytes_be();

    // Remove the artificially prepended byte
    if data.len() == 0 || data[0] != 0x01 {
        return Err(Box::from("Encoded data is invalid: Encoded data must begin with a `0x01` magic byte."));
    }

    let data = data[1..].to_vec();

    Ok(data)
}

/// Decode data just like [`decode_data`] but allow data that was not prepended
/// with a `0x01` byte.
///
/// **ATTENTION:** Using this method might corrupt the data! If the real data
/// started with an actual `0x01` byte and is encoded without using the
/// `encode_data` method, this first byte will be removed!
///
/// If the decoded data does not begin with a `0x01` byte, nothing is remove and
/// the data vector is returned just as is.
///
/// This method is motly used to allow human generated base62 strings to be used
/// as seeds for RNGs.
///
/// ## Errors
///
/// An error variant is returned when the input string contains invlid chars.
///
/// ## Example
///
/// ```rust
/// # use std::error::Error;
/// #
/// # fn main() -> Result<(), Box<dyn Error>> {
/// #
/// use num_bigint::BigUint;
/// use num_traits::FromPrimitive;
///
/// let orig = 1337;
/// // Convert to byte array
/// let orig_data = BigUint::from_i32(orig)
///     .ok_or("Failed to convert `i32` to `BigUint`")?
///     .to_bytes_be();
///
/// // Encode using both methods
/// let data_encoded = bs62::encode_data(&orig_data);
/// let num_encoded = bs62::encode_num(&orig);
///
/// // The `encode_data` function produces a different encoded string because of
/// // the prepended `0x01` byte:
/// assert_eq!(data_encoded, "HOb");
/// assert_eq!(num_encoded, "LZ");
///
/// // Convert both encoded strings back using the same funtion
/// let data = bs62::decode_data_forgiving(&data_encoded)?;
/// let num = bs62::decode_data_forgiving(&num_encoded)?;
///
/// // They should both produce the same (original) byte array
/// assert_eq!(data, orig_data);
/// assert_eq!(num, orig_data);
/// #
/// #     Ok(())
/// # }
/// ```
pub fn decode_data_forgiving(inp: &str) -> Result<Vec<u8>, Box<dyn Error>> {
    let num = decode_num(inp)?;
    let data = num.to_bytes_be();

    if data.len() == 0 || data[0] != 0x01 {
        return Ok(data);
    }

    // Remove the artificially prepended byte
    let data = data[1..].to_vec();

    Ok(data)
}

/// Decode a base62 string to a base10 (decimal) number.
///
/// ## Algorithm
///
/// The algorithm starts by assigning a 0 to a `number` variable. Then each char
/// of the input string is converted to its according index in the alphabet. The
/// number is then multiplied by 62 (the base) and the index of the char is added
/// to the number. This is repeated until all chars have been consumed.
///
/// The returned [`num_bigint::BigUint`] can be converted to a primitive type
/// using the [`num_traits::ToPrimitive`] trait.
///
/// ## Errors
///
/// An error variant is returned when the input string contains invlid chars.
///
/// ## Example
///
/// ```rust
/// # use std::error::Error;
/// #
/// # fn main() -> Result<(), Box<dyn Error>> {
/// #
/// use num_traits::ToPrimitive;
/// let big_uint = bs62::decode_num("A")?;
/// let num = big_uint.to_i32().ok_or("Unable to convert `BigUint` to `i32`")?;
///
/// assert_eq!(num, 10_i32);
/// #
/// #     Ok(())
/// # }
/// ```
pub fn decode_num(inp: &str) -> Result<BigUint, Box<dyn Error>> {
    let base = BigUint::from(BASE);
    let mut num = BigUint::zero();

    for digit in inp.chars() {
        let rem = digit_to_num(digit)?;

        num *= &base;
        num += rem;
    }

    Ok(num)
}

fn digit_to_num(digit: char) -> Result<u32, String> {
    let num = match digit {
        '0'..='9' => 0 + (digit as u32) - ('0' as u32),
        'A'..='Z' => 10 + (digit as u32) - ('A' as u32),
        'a'..='z' => 36 + (digit as u32) - ('a' as u32),
        _ => return Err(format!("Invalid character '{}'", &digit)),
    };

    Ok(num)
}

#[cfg(test)]
mod tests {
    mod encode {
        use num_traits::FromPrimitive;

        use super::super::*;

        #[test]
        fn encode_single_byte() {
            assert_eq!(encode_data(&vec![0xAA]), "6s")
        }

        #[test]
        fn encode_multiple_bytes() {
            let data = vec![
                0x49, 0x20, 0x64, 0x65, 0x64, 0x69, 0x63, 0x61, 0x74, 0x65,
                0x20, 0x74, 0x68, 0x69, 0x73, 0x20, 0x64, 0x61, 0x74, 0x61,
                0x62, 0x6c, 0x6f, 0x62, 0x20, 0x74, 0x6f, 0x20, 0x6d, 0x79,
                0x20, 0x67, 0x69, 0x72, 0x6c, 0x66, 0x72, 0x69, 0x65, 0x6e,
                0x64, 0x20, 0x53, 0x61, 0x73, 0x73, 0x69, 0x2c, 0x20, 0x77,
                0x68, 0x6f, 0x6d, 0x20, 0x49, 0x20, 0x6c, 0x6f, 0x76, 0x65,
                0x20, 0x6d, 0x6f, 0x72, 0x65, 0x20, 0x74, 0x68, 0x61, 0x6e,
                0x20, 0x61, 0x6e, 0x79, 0x74, 0x68, 0x69, 0x6e, 0x67, 0x20,
                0x3c, 0x33,
            ];
            assert_eq!(
                encode_data(&data),
                "2dijkpweCkKuJQVIavyqgkzOPiqWWiLHD0KzzlsZDQnGjtD6s6znCYYROVDlomz7lrxiMgpUEKZ7MnmwWnlpEmUNcJ4WW4wEJKATPQ9Fg5oqnVD"
            );
        }

        #[test]
        fn encode_leading_zeroes() {
            assert_eq!(encode_data(&vec![0x00, 0x00, 0x10]), "18OWW")
        }

        #[test]
        fn encode_trailing_zeroes() {
            assert_eq!(encode_data(&vec![0x01, 0x00, 0x00]), "18fZI")
        }

        #[test]
        fn encode_numbers() {
            // From i32
            assert_eq!(encode_num(&0), "0");
            assert_eq!(encode_num(&5), "5");
            assert_eq!(encode_num(&9), "9");
            assert_eq!(encode_num(&10), "A");
            assert_eq!(encode_num(&35), "Z");
            assert_eq!(encode_num(&36), "a");
            assert_eq!(encode_num(&61), "z");
            assert_eq!(encode_num(&62), "10");
            assert_eq!(encode_num(&1337), "LZ");
            // From other number formats
            assert_eq!(encode_num(&10.0), "A");
            assert_eq!(encode_num(&10_u32), "A");
            assert_eq!(encode_num(&10_usize), "A");
            assert_eq!(encode_num(&BigUint::from_i32(10).unwrap()), "A");
        }
    }

    mod decode {
        use num_traits::FromPrimitive;

        use super::super::*;

        #[test]
        fn decode_single_byte() {
            assert_eq!(decode_data("6s").unwrap(), vec![0xAA])
        }

        #[test]
        fn decode_multiple_bytes() {
            let encoded = "2dijkpweCkKuJQVIavyqgkzOPiqWWiLHD0KzzlsZDQnGjtD6s6znCYYROVDlomz7lrxiMgpUEKZ7MnmwWnlpEmUNcJ4WW4wEJKATPQ9Fg5oqnVD";
            let data = vec![
                0x49, 0x20, 0x64, 0x65, 0x64, 0x69, 0x63, 0x61, 0x74, 0x65,
                0x20, 0x74, 0x68, 0x69, 0x73, 0x20, 0x64, 0x61, 0x74, 0x61,
                0x62, 0x6c, 0x6f, 0x62, 0x20, 0x74, 0x6f, 0x20, 0x6d, 0x79,
                0x20, 0x67, 0x69, 0x72, 0x6c, 0x66, 0x72, 0x69, 0x65, 0x6e,
                0x64, 0x20, 0x53, 0x61, 0x73, 0x73, 0x69, 0x2c, 0x20, 0x77,
                0x68, 0x6f, 0x6d, 0x20, 0x49, 0x20, 0x6c, 0x6f, 0x76, 0x65,
                0x20, 0x6d, 0x6f, 0x72, 0x65, 0x20, 0x74, 0x68, 0x61, 0x6e,
                0x20, 0x61, 0x6e, 0x79, 0x74, 0x68, 0x69, 0x6e, 0x67, 0x20,
                0x3c, 0x33,
            ];
            assert_eq!(decode_data(&encoded).unwrap(), data);
        }

        #[test]
        fn decode_leading_zeroes() {
            let encoded = "18OWW";
            assert_eq!(decode_data(&encoded).unwrap(), vec![0x00, 0x00, 0x10])
        }

        #[test]
        fn decode_trailing_zeroes() {
            assert_eq!(decode_data("18fZI").unwrap(), vec![0x01, 0x00, 0x00])
        }

        #[test]
        fn decode_numbers() {
            assert_eq!(
                decode_num(&"0").unwrap(),
                BigUint::from_i32(0).unwrap()
            );
            assert_eq!(
                decode_num(&"5").unwrap(),
                BigUint::from_i32(5).unwrap()
            );
            assert_eq!(
                decode_num(&"9").unwrap(),
                BigUint::from_i32(9).unwrap()
            );
            assert_eq!(
                decode_num(&"A").unwrap(),
                BigUint::from_i32(10).unwrap()
            );
            assert_eq!(
                decode_num(&"Z").unwrap(),
                BigUint::from_i32(35).unwrap()
            );
            assert_eq!(
                decode_num(&"a").unwrap(),
                BigUint::from_i32(36).unwrap()
            );
            assert_eq!(
                decode_num(&"z").unwrap(),
                BigUint::from_i32(61).unwrap()
            );
            assert_eq!(
                decode_num(&"10").unwrap(),
                BigUint::from_i32(62).unwrap()
            );
            assert_eq!(
                decode_num(&"LZ").unwrap(),
                BigUint::from_i32(1337).unwrap()
            );
        }
    }
}