/*!
# The core module of delimited literals
 [`crate::delimited`] defines an enumeration type [`XRegex`] that represents a 
 choice from [`Regex`] struct and [`ReSequence`] structs. Its associated 
 functions try_from and from_str call `parse` and `compile` functions 
 sebsequently that convert any delimited regular expression literal 
 into compiled struct via a intermediate data BTreeMap<u32,Meta>.
*/

#![allow(unused)]

pub use regex_automata::{meta::{Regex,BuildError},Span,Match,PatternID,Input,Anchored};
pub use crate::error::Error; 
use crate::util;

use core::cell::Cell;// the wrapper structure as a mutable pointer to bytes vector in thread local static 
use alloc::collections::btree_map::BTreeMap;// the data structure for meta_map

use core::convert::TryFrom; //TryFrom trait used by 1 u32 ; 2.XRegex
use alloc::str;
use alloc::str::FromStr; //FromStr trait used by XRegex

use core::cmp::PartialEq;

use core::result::Result;
use alloc::string::String;

use core::slice::{Iter,IterMut};
use alloc::vec::{Vec,IntoIter};

//use thread_local macro reference: https://doc.rust-lang.org/std/macro.thread_local.html
thread_local! {
/// A static variable for storing regex literal delimiter (1-4 single-byte punctuations transcoded into u32 value).
	pub static DELIMITER:Cell<u32> = const {Cell::new(b'/' as u32)};
}

/// the punctuations used as the delimiters in regex sets (regex union and sequence)
pub static RE_SET_DELIMITERS:[[u8;2];2] = [[b'[',b']'],[b'<',b'>']];
/// the punctuations used as the SEPARATORs among items in regex sets (regex union and sequence)
pub static RE_SEPARATOR:u8 = b',';


/// DELIMITER_CHARS is a collection of punctuations 
/// for composing regular expression delimiters. It consists of the 
/// following punctuations: !#$%&*+,./:;=?@^_|~-
pub static DELIMITER_CHARS: [u8;20] = [0x21u8,0x23u8,0x24u8,0x25u8,0x26u8,0x2au8,0x2bu8,0x2cu8,0x2eu8,0x2fu8,0x3au8,0x3bu8,0x3du8,0x3fu8,0x40u8,0x5eu8,0x5fu8,0x7cu8,0x7eu8,0x2du8];

// A broader range of punctuation selection in forming RE delimiters: any non-alphanumeric, 
// non-backslash, non-whitespace character.
// [PCRE Delimiter Reference](https://pcre.org/original/doc/html/pcretest.html)
// Regex::new(r"^[^<>[\]\w\\\\s]+$").unwrap();
// Note: in a character class (square brackets) any character except ^, -, ] or \ is a literal.


/** 
function set_delimiter customises delimiters for regex literal.
The passed delimiter value (as a byte array) consists of 1 or upto 4 single-byte 
characters. It is firstly validated against [`DELIMITER_CHARS`]; and then 
is converted into u32 and stored in thread_local static [`DELIMITER`] and the 
function returns true if it is valid; otherwise [`DELIMITER`] is not updated 
and the function reuturns false. 

A regular expression literal (reliteral) is enclosed by delimiters. 
A pair of forwardslashes -- "/pattern_text/" originally from the 
matching operator in Perl[^1], is used as the default delimiters in reliteral, 
which makes itself distinct from the other [Rust literal expressions](https://doc.rust-lang.org/reference/expressions/literal-expr.html)

Any delimiter sequence in the pattern text of reliteral is prepended 
with a backslash.  To avoid using too many escaped backslashe, reliteral 
delimiter can be customised. 

*/

pub fn set_delimiter(delimiter:&[u8]) -> bool {	
	let result = validate_delimiter(delimiter);
	if result {
				//update DELIMITER with a u32 value calculated from delimiter's byte.
				DELIMITER.with(
					|cell_delimiter| {
							let code = bytes_to_u32(delimiter).unwrap(); 
							cell_delimiter.set(code); 
					}
				); 
		}
	result
}

pub fn get_delimiter() -> Vec<u8> {
	let mut bytes = vec![];
	DELIMITER.with(
						|cell_delimiter| {
							let  delimiter_u32 = cell_delimiter.get();
							bytes = u32_to_bytes(delimiter_u32);
						}			
					);
	bytes
}
/**
function validate_delimiter checks delimiter against [`DELIMITER_CHARS`], 
A byte sequence that consists of either a single-byte candidate puncutation or 
multiple (up to 4) repetitive ones is valid for enclosing pattern text. 
Note bracket style delimiters in many PCRE (Perl Compatible Regular Expressions
[^2]) engines are excluded from DELIMITER_CHARS, as they are reserved for 
delimiting elements in regex sets.

Note: 
* There are two styles of delimiters in PCRE: matched delimiters and 
bracket-style delimiters. This Rust crate regex-literal only uses matched delimiters: single 
or mutiple repeated punctuation characters (excluding quote characters: " ' `) in 
enclosing regex literals. Bracket-style punctuations ([],<>,(),{}, and etc) are reserved 
for regex set literals. 
[^1]: <https://perldoc.perl.org/perlre>
[^2]: <https://pcre.org/original/doc/html/pcretest.html>

*/

pub fn validate_delimiter(delimiter:&[u8]) -> bool {
	let delim_length = delimiter.len();
	if  delim_length > 4 {return false;} //limit delimiter size up to 4 bytes
	let first = delimiter[0];
	if DELIMITER_CHARS.iter().any(|&x| x == first) {
				if delim_length > 1 { // if multiple-byte delimiter
				let rest = &delimiter[1..];
				rest.iter().all(|&x| x == first) //only repetitive bytes(punctuation characters) are allowed
				} else {true} //else for one-byte delimiter
	} else {false}	
}

/// Convert a bytes array (1-4 bytes) into a u32 value.
fn bytes_to_u32(bytes:&[u8]) -> Option<u32> {
	let byte_length = bytes.len();
	if  byte_length > 4 {None}
	else {
		let mut code:u32 = 0;
		for (i, byte) in bytes.iter().enumerate(){
			let increment = *byte as u32;
			let left_shifted = ((byte_length - i - 1)*8) as u32;
			code += increment << left_shifted;
		}
		Some(code)	
	}	
}

///convert a u32 value into a byte array with the prefix 0 trimmed off.
fn u32_to_bytes(c:u32) -> Vec<u8> {
	let mut rt:Vec<u8> = Vec::new();
	let mut to_be_trimmed = true;
	for code in c.to_be_bytes() {
		if (to_be_trimmed){
			if (code != 0){
				to_be_trimmed = false;	
				rt.push(code);
			}
			//skipping 0
		}else {
			rt.push(code);
		}
	}
	if rt.is_empty() {rt.push(0)}
	rt
}

/// ReSequence is the sequence of regex_automata::Regex (can be either 
/// single-pattern or multiple-pattern)  that can be utilized in a timeline
/// /series of matching events.
#[derive(Debug,Clone)] //To debug a struct in Rust, you can use the Debug trait. The Debug trait provides a way to format the output of a struct in a programmer-facing, debugging context
pub struct ReSequence(Vec<Regex>);

impl ReSequence {
	/// Construct a new, empty `ReSequence`
	/// The Regex vector as resequence's field 0 will not be allocated in initialization   
    #[inline]
	pub const fn new() -> Self { // need transfrom like this https://docs.rs/regex/latest/src/regex/regexset/string.rs.html? No
		 ReSequence(Vec::new()) 
	}
	/// Appends an element to the back of a collection.
	#[inline]
	pub fn push(&mut self, elem:Regex)	{
		self.0.push(elem);
	}
	/// return the number of elements in ReSequence struct.
	#[inline]
	pub fn len(&self) -> usize {
        self.0.len()
	}

	/// tell if the ReSequence struct is empty or not
	#[inline]
		pub fn is_empty(&self) -> bool {
	        self.0.is_empty()
		}
	// https://users.rust-lang.org/t/newtype-pattern-for-vec-how-to-implement-iter/52653/2
	/// Returns an iterator over the slice.
	///
	/// The iterator yields all items from start to end
	#[inline]
	pub fn iter(&self) -> Iter<'_, Regex> {
	        self.0.iter()
	 }
	 
    /// Returns an iterator that allows modifying each value.
    /// The iterator yields all items from start to end.
    #[inline]
	 pub fn iter_mut(&mut self) -> alloc::slice::IterMut<'_, Regex> {
	         self.0.iter_mut()
	 }

	 
	 /// Removes the last element from a vector and returns it, or None if it is empty.
	 #[inline]
	 pub fn pop(&mut self) -> Option<Regex> {	 	
	 	self.0.pop()
	 }


	/// Resequence is used as slice.
	#[inline]
	 pub fn as_slice(&self) -> &[Regex] {
	 	self.0.as_slice()
	 }	 
}


//Implementing Iterator https://doc.rust-lang.org/std/iter/index.html
//ref example https://stackoverflow.com/questions/34733811/what-is-the-difference-between-iter-and-into-iter
//ref example https://stackoverflow.com/questions/30218886/how-to-implement-iterator-and-intoiterator-for-a-simple-struct

/// The method that converts ReSequence into an Iterator, which works with `for` syntax.
#[cfg(not(no_global_oom_handling))]
impl IntoIterator for ReSequence {
    type Item = Regex;
    type IntoIter = IntoIter<Self::Item>;
    
	#[inline]
    fn into_iter(self) -> Self::IntoIter {
        self.0.into_iter()
    }   
}

/// Create a value from an iterator.
//reference https://doc.rust-lang.org/std/iter/trait.FromIterator.html
#[cfg(not(no_global_oom_handling))]
impl FromIterator<Regex> for ReSequence {
	#[inline]
	fn from_iter<I: IntoIterator<Item = Regex>>(iter: I) -> Self {
			let mut c = ReSequence::new();
	        for i in iter {
	        	c.push(i);
	        }
	        c
	    }	    	
}



/// identifiers for regex literal kinds
#[derive(Debug,Clone)]
pub enum LiteralForm{
	///literal representation of [`crate::ReSequence`] 
	ReS, 
	///literal representation of [`Regex`] that holds multiple patterns
	ReU,
	///literal representation of [`Regex`] that holds one pattern
	Re,
} 



/// a collection of regular expression data artifacts
#[derive(Debug, Clone)]
pub struct XRegex {
	pub literal:(u32,String),//item 0 re_puncts, item 1 literal string (whitespace trimmed during parsing)
	pub data:ReSequence,
	pub kind:LiteralForm,
}

//Compare two XRegex structs, reference https://doc.rust-lang.org/std/cmp/trait.PartialEq.html

impl PartialEq for XRegex {
	fn eq(&self, other: &Self) -> bool {
		self.literal.0 == other.literal.0 && self.literal.1 == other.literal.1 
	}
}


//TODO: XRegex use some design pattern in future? https://rust-unofficial.github.io/patterns/patterns/behavioural/strategy.html

/// convert reliteral bytes to XRegex
impl TryFrom<&[u8]> for XRegex {
		type Error = Error;//crate::error::Error
	fn try_from(value: &[u8]) -> Result<Self,Self::Error> {
		DELIMITER.with(
					|cell_delimiter| {	
					let  delimiter_u32 = cell_delimiter.get();
					let re_puncts:Vec<u8> = u32_to_bytes(delimiter_u32);
					let metadata = parse(value,&re_puncts[..])?;
					compile(value,metadata,&re_puncts[..])
					}
		)			
	}
}

//TODO: other construction option:read file as binary into utf-8 string: use std::fs; String::from_utf8_unchecked(&fs::read("address.txt")?)
//https://doc.rust-lang.org/std/str/trait.FromStr.html
/// convert reliteral string (&str) into XRegex
impl FromStr for XRegex {
	type Err = Error;
	fn from_str(value:&str) -> Result<Self,Self::Err> {
		Self::try_from(value.as_bytes())
	}	
}

impl XRegex {
	/// XRegex constructor creates XRegex struct with a customised re_puncts
	pub fn new(re_text:&str,re_puncts:&[u8]) -> Result<Self,Error> {
		if !validate_delimiter(re_puncts) {
			let mut msg = "Invalid delimiter:".to_owned();
			let msg_body = match str::from_utf8(re_puncts) {
				Err(_) => "non-utf8-code",
				Ok(puncts_str) => puncts_str,
			};
			msg.push_str(msg_body);
			Err(Error::Syntax(msg))
		}else {
			let reliteral = re_text.as_bytes();
			let metadata = parse(reliteral,re_puncts)?;
			compile(reliteral,metadata,re_puncts)
		}	
	}
	
	/// check if XRegex is Resequence or not
	pub fn is_seq(&self) -> bool {
		matches!(self.kind,LiteralForm::ReS)
		}

	/// get regex  from XRegex struct. None is returned if it is not item kind.
	pub fn get_regex(&mut self) -> Option<Regex> { //https://stackoverflow.com/questions/29662807/how-do-you-borrow-a-mutable-reference-during-a-match
		if self.is_seq() {None}
		else {self.data.pop()}
	}

	/// get regex sequence from XRegex struct. None is returned if it is not seq kind.
	pub fn as_slice(&self) -> Option<&[Regex]> {
	 	if self.is_seq() {
	 		Some(self.data.as_slice())
	 	}else {None}	
	}

	/// tell if its data is empty
	pub fn is_empty(&self)	-> bool {
		self.data.is_empty()
	}
	
}

//may develop a WASM version of `Meta`. memory allocation for wasm stack memory: 4G
//WebAssembly linear memory objects have sizes measured in pages. Each page is 65536 (2^16) bytes. In WebAssembly version 1, a linear memory can have at most 65536 pages, for a total of 2^32 bytes (4 gibibytes).
//https://stackoverflow.com/questions/40417774/memory-limits-in-webassembly#:~:text=1%20current%20WebAssembly%20implementations%20follow%20a%2032bit%20addressing,pages%20as%20something%20more%20%22safe%22%20than%20desktop%20applications.

/// intermediate data in between parse and compile stages when converting reliteral to XRegex
struct Meta {
	/// kind is in accordance with the variants in XRegex 
	kind:LiteralForm,
	//the content range with opening and closing delimiters excluded
	range:(u32,u32),
	// an optional list containing the children indices
	children:Option<Vec<u32>>,
}


impl Meta {
    /// add child meda index to the meta children list
    fn add_child(&mut self,child_index:u32) {
		match &mut self.children { //match &mut self.children ??
			Some(ref mut vec) => {vec.push(child_index);}, // &mut, ref mut are omitted as the compiler can infer them
			//&mut Some(ref mut vec) => {vec.push(child_index)},
			//https://stackoverflow.com/questions/29662807/how-do-you-borrow-a-mutable-reference-during-a-match
			None => {self.children = Some(<Vec<u32>>::from([child_index]));},
			//&mut None => {self.children = Some(<Vec<u32>>::from([child_index]));},
		}
	}
	/// appoint the meta's closing range 
	fn finalise(&mut self, right_range:u32){
		self.range.1 = right_range;
	}
}
/// construct meta_re provided with the starting delimiter, the closing delimiter, and re_delimiter length
fn create_meta_re(start:usize,end:usize) -> Meta {
	Meta{kind:LiteralForm::Re,range:(start as u32,end as u32),children:None}							
}

/// analyse reliteral (the byte form of regex literal) with the preset 
/// re_puncts (the byte form of regex delimiter), a tuple of root meta 
/// index in reliteral and meta_map is returned when the execution is successful.
/// The max reliteral length is set to 32 bits, which makes the produced keys 
/// in meta_map (BTreeMap) is confined to u32.
fn parse(reliteral: &[u8],re_puncts: &[u8]) -> Result <(u32,BTreeMap<u32,Meta>),Error>{
	//metaMap stores all the meta data of literal forms and their indices
	let mut meta_map:BTreeMap<u32,Meta> = BTreeMap::new(); 
	let target_size = reliteral.len();
	let reliteral_bytes_fitting_in_u32 = u32::try_from(target_size);
	if let Err(err) = reliteral_bytes_fitting_in_u32 {
		return Err(Error::Syntax(format!("Invalid reliteral as its size exceeds the limit of 2^32 bytes: {err}")));
	}


	let re_delimiter_length = re_puncts.len();
	//walk through all bytes of reliteral
	let mut index:usize = 0;
	let start = util::offset_ws(reliteral,index);
	
	index = start;
	let walk_over = proceed(&mut index,reliteral,re_puncts,&mut meta_map);
	if walk_over {	
		let end = util::offset_ws(reliteral,index);							
		if end != target_size {
			return Err(Error::Syntax(format!("Invalid reliteral - an unparsed tail from byte index {end}.")));
		}
		if meta_map.is_empty() {
			return Err(Error::Syntax("Invalid reliteral - no meta data has been parsed.".to_owned()))
		}								
		Ok((start as u32,meta_map))
	} else {
	Err(Error::Syntax("Unrecognized reliteral format!".to_owned()))
	}							
}

	/// proceed reliteral while iterating its byte index i with the provided re_puncts for producing meta_map
	fn proceed(i:&mut usize,reliteral: &[u8],re_puncts: &[u8],meta_map:&mut BTreeMap<u32,Meta>) -> bool {
		store_re(None,i,reliteral,re_puncts,meta_map) || 
		store_reu(None,i,reliteral,re_puncts,meta_map) || 
		store_res(None,i,reliteral,re_puncts,meta_map) 
	}
		
		// pc_index parent closure index 
		fn store_re(pc_index:Option<u32>,i:&mut usize,reliteral: &[u8],re_puncts: &[u8],meta_map:&mut BTreeMap<u32,Meta>) -> bool {
			let j = *i;
			let this_index = j as u32;
			let re_delimiter_length = re_puncts.len();
			if let Some(indices) = find_re_range(j,reliteral,re_puncts) {
				
				meta_map.insert(this_index,create_meta_re(indices[0],indices[1]));//create and insert re item
				if	let Some(pos) = pc_index { // add the index to its parent meta when it exists
					if let Some(meta) = meta_map.get_mut(&pos) {
						meta.add_child(this_index); 
					} else {
						return false;//throw error when the parent item can't be located by index in meta_map
					}	
				}
				*i = indices[1] + re_delimiter_length;
				true
			}else {false}								
		}
		
		fn store_reu(pc_index:Option<u32>,i:&mut usize,reliteral: &[u8],re_puncts: &[u8],meta_map:&mut BTreeMap<u32,Meta>) -> bool {
			let start = *i;
			let this_index = start as u32;
			if (reliteral[start] == RE_SET_DELIMITERS[0][0]){ //matching ReUnion delimiter
				*i += 1; 
				let mut this_meta = Meta{kind:LiteralForm::ReU,range:(*i as u32,*i as u32),children:None};
				meta_map.insert(this_index,this_meta);
				*i = util::offset_ws(reliteral,*i);//stepping over whitespace characters
				
				if !store_re(Some(this_index),i,reliteral,re_puncts,meta_map) {
					return false;
				}
				//make sure getting first item
				let mut proceeding = true;
				while proceeding { //iteratively collecting RE SEPARATOR and item
					*i = util::offset_ws(reliteral,*i);//stepping over whitespace characters after the re item
					if reliteral[*i] == RE_SEPARATOR { //matching the SEPARATOR character
						*i += 1; //stepping RE_SEPARATOR
						*i = util::offset_ws(reliteral,*i);//stepping over whitespace characters after RE_SPERATOR
							
						if !store_re(Some(this_index),i,reliteral,re_puncts,meta_map) {
							return false;
						}//the function returns a false abnormality when no item follows RE_SEPARATOR
					}else {proceeding = false;}
				}
				if	reliteral[*i] == RE_SET_DELIMITERS[0][1]  { //matching the closing delimiter of ReU
					//assign i to the right range
					
					if let Some(this_meta) = meta_map.get_mut(&this_index){
						this_meta.finalise(*i as u32);
					}
					
					 
					if let Some(pos) = pc_index {
						if let Some(meta) = meta_map.get_mut(&pos) {
							meta.add_child(this_index); 
						} else {
							return false;//unable to locate the parent item by index in meta_map
						}	
					}														
					*i += 1; //stepping over the closing RE_SET_DELIMITER
					return true;
				}
			} //end of if matching ReUnion delimiter
			false	
		}
		
		fn store_res(pc_index:Option<u32>,i:&mut usize,reliteral: &[u8],re_puncts: &[u8],meta_map:&mut BTreeMap<u32,Meta>) -> bool {
			let start = *i;
			let this_index = start as u32;
			if (reliteral[start] == RE_SET_DELIMITERS[1][0]){ //matching ReSequence delimiter
				*i += 1; 
				let this_meta = Meta{kind:LiteralForm::ReS,range:(*i as u32,*i as u32),children:None};
				meta_map.insert(this_index,this_meta);
				*i = util::offset_ws(reliteral,*i);//stepping over whitespace characters
				if !store_re(Some(this_index),i,reliteral,re_puncts,meta_map) 
               && !store_reu(Some(this_index),i,reliteral,re_puncts,meta_map) {
					return false;
				}
				//make sure getting first item
				let mut proceeding = true;
				while proceeding { //iteratively collecting RE SEPARATOR and item
					*i = util::offset_ws(reliteral,*i);//stepping over whitespace characters after the re item
					if reliteral[*i] == RE_SEPARATOR { //matching the SEPARATOR character
						*i += 1; //stepping RE_SEPARATOR
						*i = util::offset_ws(reliteral,*i);//stepping over whitespace characters after RE_SPERATOR
						if !store_re(Some(this_index),i,reliteral,re_puncts,meta_map) && 
						!store_reu(Some(this_index),i,reliteral,re_puncts,meta_map) {
							return false;
						}
					}else {proceeding = false;}
				}
				if	reliteral[*i] == RE_SET_DELIMITERS[1][1]  { //matching the closing delimiter of ReS
					//assign i to the right range
					if let Some(this_meta) = meta_map.get_mut(&this_index){
						this_meta.finalise(*i as u32);
					}
					
					 
					if let Some(pos) = pc_index {
						if let Some(meta) = meta_map.get_mut(&pos) {
							meta.add_child(this_index); 
						} else {
							return false;//unable to locate the parent item by index in meta_map
						}	
					}													
					
					*i += 1; //stepping over the closing RE_SET_DELIMITER										
					return true;
				}
			} //end of if matching ReSequence delimiter
		  false						
		} //end of fn store_res								



/// Construct regex from meta_ref, save it into pool, and return literal back
fn compile_re(source:&[u8],pool:&mut ReSequence,meta_ref:&Meta,re_puncts:&[u8]) -> Result<String,Error> {
		let start = meta_ref.range.0 as usize;
		let stop = meta_ref.range.1 as usize;
		let regex = regex_from_delimited_literal(&source[start..stop],re_puncts)?;	
		pool.push(regex);//store to pool data
		let re_puncts_length = re_puncts.len();
		let full_start = start-re_puncts_length;
		let full_stop = stop+re_puncts_length;
		let postback_bytes = &source[full_start..full_stop];
		match str::from_utf8(postback_bytes) {
			Ok(postback) => Ok(postback.to_owned()) ,
			Err(error) => Err(Error::from_utf8_error(error,full_start)) ,
		}		
}


/// construct regexset from meta_ref and meta_map, save it into pool, and return literal
fn compile_reu(source:&[u8],pool:&mut ReSequence, meta_ref:&Meta,meta_map:&BTreeMap<u32,Meta>,re_puncts:&[u8]) 
	-> Result<String,Error> {
	let range:[u32;2] = [meta_ref.range.0,meta_ref.range.1]; 
	if let Some(ref children_indices) = meta_ref.children { 
	//an alternative way is to use children_indices.iter().map (|x| ...)
		
		let mut re_union:Vec<&[u8]> = Vec::new(); //store regex patterns (without delimiters) into regex union
		//refactor re_bytes to re_literals
		let mut re_literals:Vec<&str> = Vec::new(); //store regex literals (including regex delimiters) into regex literal 
		let puncts_length = re_puncts.len();
		//an array of u8 vector is needed to be constructed beforehand
		//https://stackoverflow.com/questions/70510299/how-to-declare-a-static-array-of-vectors#:~:text=If%20you%20know%20the%20size%20of%20the%20%22vec%22,This%20lets%20you%20write%20something%20like%20the%20following%3A
		for child_start_index in children_indices.iter() {
			if let Some(child_meta_ref) = meta_map.get(child_start_index){

				let start = child_meta_ref.range.0 as usize;
				let stop = child_meta_ref.range.1 as usize;
				let full_start = start - puncts_length;
				let full_stop = stop + puncts_length;
			
				let re_item = &source[start..stop];
				re_union.push(re_item);

				match str::from_utf8(&source[full_start..full_stop]) { //get literal
					Ok(re_literal) => re_literals.push(re_literal),
					Err(err) => return Err(Error::from_utf8_error(err,full_start)),
				}
			} else { //the case that child meta is not found by the child_start_index
				return Err(Error::Syntax(
					format!("The literal of ReU (RegexUnion) ranging {range:?} does not have valid Regex item at byte index {child_start_index}.")
				));
			}
		}
		if re_union.is_empty() {
			return Err(Error::Syntax(
				format!("The literal for ReU (RegexUnion) ranging {range:?} contains 0 regex item.")
			));
		}

		let regexset = regexset_from_delimited_literals(&re_union[..],re_puncts)?;
		pool.push(regexset);
		let mut postback_string = String::from("");
		postback_string.push(char::from_u32(RE_SET_DELIMITERS[0][0] as u32).unwrap());
		postback_string.push_str(&re_literals.join(str::from_utf8(&[RE_SEPARATOR]).unwrap()));
		postback_string.push(char::from_u32(RE_SET_DELIMITERS[0][1] as u32).unwrap());
		Ok(postback_string)	
	} else { //when meta_ref.children is None
		Err(Error::Syntax(
		format!("The literal for ReU (Regex Union) ranging {range:?}  does not have valid Regex item.")
		))
	}
} 

/// construct a vector of regex data from meta_ref and meta_map, save it to pool and post back literal
fn compile_res(source:&[u8],pool:&mut ReSequence, meta_ref:&Meta,meta_map:&BTreeMap<u32,Meta>,re_puncts:&[u8]) -> 
Result<String,Error> {	
		let range:[u32;2] = [meta_ref.range.0,meta_ref.range.1];
		
		if let Some(ref children_indices) = meta_ref.children {
		 	let sequence_length = children_indices.len();
		 	let mut literal_seq:Vec<String> = Vec::with_capacity(sequence_length);
		 	for child_start_index in children_indices.iter() {
				if let Some(child_meta_ref) = meta_map.get(child_start_index){
						match child_meta_ref.kind {
							LiteralForm::Re => {
								let re = compile_re(source,pool,child_meta_ref,re_puncts)?;
								literal_seq.push(re);
							},
							LiteralForm::ReU => {
								let reu = compile_reu(source,pool,child_meta_ref,meta_map,re_puncts)?;
								literal_seq.push(reu);
							},
							_ => {
								return Err(Error::Syntax(
											format!("The literal of ReS (ReSequence) ranging {range:?} has encountered an unhandled meta kind at index {child_start_index}.")
											)
										);
							},
						}
				} else { //child meta is not found by child_start_index
						return Err(Error::Syntax(
							format!("Within ReS (ReSequence) ranging {range:?}, the Regex item cannot be located by its byte index {child_start_index}.")
						));
				}
		 	}
		 	if literal_seq.is_empty() {
				return Err(Error::Syntax(
					format!("The literal for ReS (ReSequence) ranging {range:?} contains 0 Regex item.")
				));
			}

			let mut postback_string = String::from("");
			postback_string.push(char::from_u32(RE_SET_DELIMITERS[1][0] as u32).unwrap());
			let joined = &literal_seq[..].join(str::from_utf8(&[RE_SEPARATOR]).unwrap()); //to be tested
			postback_string.push_str(&literal_seq.join(&joined[..]));
			postback_string.push(char::from_u32(RE_SET_DELIMITERS[1][1] as u32).unwrap());
			Ok(postback_string)	

		}  else { 
			Err(Error::Syntax(
			format!("The literal for ReS (Regex Sequence) positioned ranging {range:?} has zero Regex item.")
			))
		}
}

 /// the method constructs XRegex data given reliteral source and the parsed metadata
 fn compile(source:&[u8],parsed:(u32,BTreeMap<u32,Meta>),re_puncts:&[u8]) -> Result<XRegex, Error> {
			let re_delimiter_length = re_puncts.len();
			let index = parsed.0;
			let meta_map = &(parsed.1); 
			if let Some(meta_ref) = meta_map.get(&index) {
			
				let mut pool:ReSequence = ReSequence::new();
				let mut pool_ref = &mut pool;
				let delimiter = bytes_to_u32(re_puncts).ok_or(Error::Syntax("Failed in delimiter transcoding.".to_owned()))?;//double check	
							
				match meta_ref.kind {
					LiteralForm::Re => {
						let re = compile_re(source,pool_ref,meta_ref,re_puncts)?; 
						Ok(XRegex{data:pool,literal:(delimiter,re),kind:LiteralForm::Re})
					}, 
					LiteralForm::ReU => {
						let reu = compile_reu(source,pool_ref,meta_ref,meta_map,re_puncts)?;
						Ok(XRegex{data:pool,literal:(delimiter,reu),kind:LiteralForm::ReU}) 
					}, 
					LiteralForm::ReS => {
						let res = compile_res(source,pool_ref,meta_ref,meta_map,re_puncts)?;
						Ok(XRegex{data:pool,literal:(delimiter,res),kind:LiteralForm::ReS})
					},
				} 				
			} 
			else {
				Err(Error::Syntax(format!("No meta data indexed at {index} in meta_map.")))
			}
 }

/// Construct regex from a delimited literal. @todo ,refactor it for storing original escaped characters
fn regex_from_delimited_literal(rebody:&[u8],delimiter:&[u8]) -> 
Result<Regex,Error> {
	let unescaped = match util::unescape_from_bytes(rebody,delimiter){
		Ok(text) => text,
		Err(err_info) => return Err(Error::Syntax(err_info)),
	};	
	Regex::new(&unescaped[..]).map_err(Error::from_meta_build_error)
}	

/// Construct regex from an array of delimited literals.
fn regexset_from_delimited_literals(rebodies:&[&[u8]],delimiter:&[u8]) -> 
Result<Regex,Error> {
	let mut vec = Vec::new();//for storing  reliterals (String type) representing Re
	for bytes_ref in rebodies.iter() {
		let unescaped = match util::unescape_from_bytes(bytes_ref,delimiter){
			Ok(text) => text,
			Err(err_info) => return Err(Error::Syntax(err_info)),
		};
		vec.push(unescaped.into_owned());
	}
	//get the references from iterator, following example:https://doc.rust-lang.org/std/vec/struct.Vec.html#method.iter
	let mut ref_vec = Vec::new();
	let vec_refs = &vec;
	for bytes_ref in vec_refs.iter(){ //iter() iterates over &String (= &str)
		ref_vec.push(bytes_ref);
	}
	Regex::new_many(&ref_vec).map_err(Error::from_meta_build_error)
}

/// Given the starting index i in reliteral bytes, the function is to find 
/// the content range of in reliteral in between the pair of delimiters
/// (represented as re_puncts in UTF-8 bytes).
/// The boundary indices  are returned if found; otherwise, `None` is returned.
fn find_re_range(i:usize,reliteral:&[u8],re_puncts:&[u8]) -> Option<[usize;2]> {
	let re_delimiter_length = re_puncts.len();
	let target_length = reliteral.len();
	
	let mut result:[usize;2] = [0;2];
	let mut k = i;
	let mut step:usize = 0;
	let first_slice_end = k + re_delimiter_length;
	//the following condition ensures (1) there is valid content length besides the pair of re_delimiters (2) the opening re_puncts has been caught
	if target_length > (k + 2 * re_delimiter_length)    &&  &reliteral[k..first_slice_end] == re_puncts {
		result[0] = k + re_delimiter_length;
		step = re_delimiter_length;
	}else {return None;}
	let mut escaped = false;
	while step > 0 {
		k += step;
		if (k + re_delimiter_length) > target_length {
			return None;//the closing re_delimiter is not found till the end of bytes
		}
		let code = reliteral[k];
		let char_length = util::infer_char_size(code);
		match char_length {
			0 => {
				println!("invalid UTF code is found at index {}",k);
				return None;
			},
			1 => {
				if code == b'\\' {
					escaped = !escaped;
				}else {
					if !escaped {
						if &reliteral[k..(k + re_delimiter_length)]	== re_puncts { // the cadidate characters used by re_delimiter are 1-byte character only
							result[1] = k;
							return Some(result);
						}
					}else {escaped = false;}
				}	
			},
			_ => {
				if escaped {
					escaped = false;
				}	
			},
			
		} // end of match
		step = char_length as usize;	
	} //end of while loop
	None
	
}

#[cfg(test)]
mod tests {
	use super::*;	
	#[test]
	fn test_find_re_range(){
		let re_bytes =  "/(?i)\\/ab+c\\//".as_bytes();
		let range = find_re_range(0,re_bytes,&[b'/']);
		assert_eq!(range.unwrap(),[1,13]);	
	}
	
	#[test]
	fn test_regex_from_delimited_literal(){ //reviewing up to here 
		let re0 = regex_from_delimited_literal(br"(?i)ab+c\/",&[b'/']).unwrap();//re_delimiter `/` is presented in escaped in re0
		assert!(re0.is_match("ABBBC/"));//this assertion test is conducted in the module level.
	}

	#[test]
	fn test_regexset_from_delimited_literals(){
		let my_text = "ABBBC abc123";
		//let reunion_str =  "[/(?i)ab+c/,/(?u)\\w+D+/]";
		let item0 = br"(?i)ab+c";
		let item1 = br"\d+";
		//https://stackoverflow.com/questions/64309656/how-to-convert-a-rust-array-to-pointer
		//https://www.hackertouch.com/how-to-print-type-of-variable-in-rust.html
		//note pass `&reunion_item0[..]` as &[u8], while pass &reunion_item0 as [u8;8]
		let reunion = [&item0[..],&item1[..]];
		let my_set = regexset_from_delimited_literals(&reunion,&[b'/']).unwrap();
		let matches:Vec<Match> = my_set.find_iter(my_text).collect();
		assert_eq!(matches,vec![Match::must(0,0..5),Match::must(0,6..9),Match::must(1,9..12)]);
	} 
}