num2phrase 0.1.0

Convert long number to a sequence of memorisable phrase with combination of short numbers
Documentation
use std::collections::VecDeque;

use num_bigint::BigUint;
use num_traits::ToPrimitive;
use crate::{indexer::{Digits, BIP39}, token::{CustomToken, IntermediateFullStructure, IntermediateToken, ParseHandler, RepeatKind}};
use crate::token_kind::TokenKind;


pub fn bigint_to_string(structure: &IntermediateFullStructure, mut value: BigUint) -> String {
	fn helper(tokens: &[IntermediateToken], value: &mut BigUint) -> String {
		let mut out: Vec<String> = Vec::new();
		for token in tokens {
			match token {
				IntermediateToken::Literal(s) => out.push(s.to_string()),
				IntermediateToken::Group(group) => out.push(helper(&group.tokens, value)),
				IntermediateToken::Repeat(inner, repeat_kind) => {
					let RepeatKind::Between(min, max) = repeat_kind;
                    let min = min.unwrap_or(0);
					let check: Box<dyn Fn(usize) -> bool> = match max {
						Some(m) => Box::new(|i| i < *m),
					    None => Box::new(|_| true),
				    };
				    // Always perform at least min repetitions
					 for _ in 0..min {
					    out.push(helper(std::slice::from_ref(inner), value));
				    }
					// For the remaining (max - min), only perform if value is not exhausted
					let mut performed = min;
					while check(performed) {
						// Take the lowest bit as the binary choice
						let choice = (&*value % 2u8).to_u8().unwrap();
						*value /= 2u8;
						if choice == 0 {
							break;
						}
						out.push(helper(std::slice::from_ref(inner), value));
						performed += 1;
					}
				}
				IntermediateToken::Custom(custom) => {
					match custom {
						CustomToken::DIGITS => {
							// Use TokenKind trait for base/max and digit conversion with BigUint arithmetic
							let base_usize = Digits::max();
							let base_bi = BigUint::from(base_usize as u64);
							let digit = (&*value % &base_bi).to_usize().unwrap();
							let digit_str = Digits {}.from_idx(digit).unwrap();
							out.push(digit_str);
							*value /= &base_bi;
						}
						CustomToken::BIP39 => {
							let base_usize = BIP39::max();
							let base_bi = BigUint::from(base_usize as u64);
							let index = (&*value % &base_bi).to_usize().unwrap();
							let word = BIP39 {}.from_idx(index).unwrap();
							out.push(word);
							*value /= &base_bi;
						}
					}
				}
			}
		}
		out.join(" ")
	}
	helper(&structure.tokens, &mut value)
}

fn degroup(schema: &IntermediateToken) -> Vec<IntermediateToken> {
	let mut result = Vec::new();
	match schema {
		IntermediateToken::Group(inner) => {
			for token in &inner.tokens {
				result.extend(degroup(token));
			}
		},
		IntermediateToken::Repeat(_, _) => panic!("Repeat tokens should be handled outside degrouping"),
		_ => result.push(schema.clone()),
	}
	result
}

fn prepare(schema: &IntermediateFullStructure) -> ParseHandler {
    let mut prefix: Vec<IntermediateToken> = vec![];
    let mut suffix: Vec<IntermediateToken> = vec![];
    let mut repeat: Option<(Vec<IntermediateToken>, RepeatKind)> = None;
    let mut after_repeat = false;
    let mut tokens = VecDeque::from(schema.tokens.clone());
    
    while let Some(token) = tokens.pop_front() {
        match token {
            IntermediateToken::Group(inner) => {
                for t in inner.tokens.iter().rev() {
                    tokens.push_front(t.clone());
                }
            },
            IntermediateToken::Repeat(inner, kind) => {
                if !after_repeat {
                    repeat = Some((degroup(&inner), kind.clone()));
                    after_repeat = true;
                } else {
                    unreachable!("Multiple repeat tokens found, which should be impossible due to parsing rules.");
                }
            },
            t @ _ => {
                if !after_repeat {
                    prefix.push(t);
                } else {
                    suffix.push(t);
                }
            }
        }
    }
    ParseHandler { prefix, repeat, suffix }
}

#[allow(dead_code)]
fn repeat_vec<T>(vec: Vec<T>, k: usize) -> Vec<T> where T: Clone {
	let mut out = Vec::with_capacity(vec.len() * k);
	for _ in 0..k {
		out.extend_from_slice(&vec);
	}
	out
}

fn parse_prepared(parse_handler: &ParseHandler, input: &str) -> std::result::Result<BigUint, String> {
	let mut acc = BigUint::from(0u32);
	let mut mult = BigUint::from(1u32);
	let splitted = input.split_whitespace().collect::<Vec<&str>>();
	let input_amount = splitted.len();
	let repeating_group_length = input_amount - parse_handler.prefix.len() - parse_handler.suffix.len();
	let prefix_value: Vec<&str> = splitted.iter().take(parse_handler.prefix.len()).map(|s| *s).collect();
	let suffix_value: Vec<&str> = splitted.iter().rev().take(parse_handler.suffix.len()).map(|s| *s).collect::<Vec<&str>>().into_iter().rev().collect();
	let repeat_value: Vec<&str> = splitted.iter().skip(parse_handler.prefix.len()).take(repeating_group_length).map(|s| *s).collect();

	if repeating_group_length > 0 && (parse_handler.repeat.is_none() || parse_handler.repeat.as_ref().unwrap().0.is_empty()) {
		return Err(format!("The provided token do not match the sequence constraint")); // No repeated
	}

	if repeating_group_length > 0 && repeating_group_length % parse_handler.repeat.as_ref().unwrap().0.len() != 0 {
		return Err(format!("The provided token amount doesn't find into repeat group correctly"));
	}

	if parse_handler.repeat.is_some() && parse_handler.repeat.as_ref().unwrap().0.len() == 0 {
		unreachable!("Repeat length exists but no tokens are included");
	}

	if let None = parse_handler.repeat {
		if repeating_group_length != 0 {
			return Err(format!("The provided token do not match the sequence constraint"));
		}
	}

	fn handle(str_value: &str, token: &IntermediateToken) -> std::result::Result<(BigUint, BigUint), String> {
		match token {
            IntermediateToken::Literal(s) => {
                if s == str_value {
                    return Ok((BigUint::from(0u8), BigUint::from(1u8)));
                } else {
                    return Err(format!("Expected literal '{}', found '{}'", s, str_value).to_string());
                }
            },
            IntermediateToken::Custom(custom) => {
				let value: usize;
				let max: usize;
                match custom {
					CustomToken::BIP39 => {
						max = BIP39::max();
						value = match (BIP39 {}.from_name(str_value)) {
							Some(i) => i,
							None => return Err(format!("Value '{}' not found in BIP39 wordlist", str_value).to_string()),
						};
						return Ok((BigUint::from(value as u64), BigUint::from(max as u64)));
					},
					CustomToken::DIGITS => {
						max = Digits::max();
						value = match (Digits {}.from_name(str_value)) {
							Some(i) => i,
							None => return Err(format!("Value '{}' not a valid digit", str_value).to_string()),
						};
						return Ok((BigUint::from(value as u64), BigUint::from(max as u64)));
					},
				}
            },
			IntermediateToken::Group(_) | IntermediateToken::Repeat(_, _) => unreachable!("Removed during pre-processing")
        }
	}

	// 1) Parse prefix left-to-right (as emitted)
	for (str_value, token) in prefix_value.into_iter().zip(parse_handler.prefix.iter()) {
		let (v, b) = handle(str_value, token)?;
		acc = acc + &mult * v;
		mult = mult * b;
	}

	// 2) Parse repeat block: first min groups, then extras with 1-bits before each group, finally terminating 0-bit if not at max
	if parse_handler.repeat.is_some() {
		let repeat_vec_i = parse_handler.repeat.clone().unwrap().0.clone();
		let repeat_kind = parse_handler.repeat.clone().unwrap().1.clone();
		let group_len = repeat_vec_i.len();
		let repeat_amount = if group_len == 0 { 0 } else { repeating_group_length / group_len };

		if let RepeatKind::Between(Some(v), _) = repeat_kind {
			if repeat_amount < v {
				return Err(format!("The provided token amount doesn't find into repeat group correctly"));
			}
		}

		if let RepeatKind::Between(_, Some(v)) = repeat_kind {
			if repeat_amount > v {
				return Err(format!("The provided token amount doesn't find into repeat group correctly"));
			}
		}

		let min = match repeat_kind { RepeatKind::Between(Some(v), _) => v, _ => 0 };
		let max = match repeat_kind { RepeatKind::Between(_, v) => v };
		let extras = repeat_amount.saturating_sub(min);

		// First, process the mandatory min groups left-to-right
		for g in 0..min {
			let start = g * group_len;
			let end = start + group_len;
			let group_slice = &repeat_value[start..end];
			for (str_value, token) in group_slice.iter().zip(repeat_vec_i.iter()) {
				let (v, b) = handle(str_value, token)?;
				acc = acc + &mult * v;
				mult = mult * b;
			}
		}

		// Then, for each extra, insert 1-bit then process the group
		for e in 0..extras {
			// continuation bit '1'
			acc = acc + &mult * BigUint::from(1u8);
			mult = mult * BigUint::from(2u8);

			let g = min + e;
			let start = g * group_len;
			let end = start + group_len;
			let group_slice = &repeat_value[start..end];
			for (str_value, token) in group_slice.iter().zip(repeat_vec_i.iter()) {
				let (v, b) = handle(str_value, token)?;
				acc = acc + &mult * v;
				mult = mult * b;
			}
		}

		// Finally, terminating '0' bit unless we are exactly at max
		if max != Some(repeat_amount) {
			mult = mult * BigUint::from(2u8);
		}
	}

	// 3) Parse suffix left-to-right
	for (str_value, token) in suffix_value.into_iter().zip(parse_handler.suffix.iter()) {
		let (v, b) = handle(str_value, token)?;
		acc = acc + &mult * v;
		mult = mult * b;
	}

	Ok(acc)
}

pub fn string_to_bigint(structure: &IntermediateFullStructure, input: &str) -> std::result::Result<BigUint, String> {
	let parse_handler = prepare(structure);
	parse_prepared(&parse_handler, input)
}