rustyphoenixlecture 1.3.0

/***************************************
	Auteur : Pierre Aubert
	Mail : pierre.aubert@lapp.in2p3.fr
	Licence : CeCILL-C
****************************************/

use std::path::PathBuf;
use std::fs;

use crate::phighlighter::plocation::PLocation;

///Iterator on a PFileParser
#[derive(Debug,Clone,Default)]
pub struct PFileParserIter<'a>{
	///Location of the parsed file
	p_location: PLocation,
	///Iterator of the content of the parser
	p_iter_content: std::slice::Iter<'a, u8>,
	///Current char
	p_current_char: Option<u8>,
	///Previous char
	p_prev_char: Option<u8>,
	///Current indentation level of the parser
	p_current_indentation_level: Option<usize>,
	///True if we are on the first indented char which is not a space or tabulation
	p_is_first_indented_char: bool,
	///Escape char of the parser
	p_escape_char: Option<u8>,
	///Say if the current char is escaped or not
	p_is_current_chat_escaped: bool,
}

impl<'a> PFileParserIter<'a>{
	///Get the PLocation of the current PFileParserIter
	/// # Returns
	/// Loation of the current PFileParserIter
	pub fn get_location(&self) -> &PLocation{
		&self.p_location
	}
	///Say if the iterator is at the end of the file or not
	/// # Returns
	/// True if the iterator is at the end of the file, false otherwise
	pub fn is_end_of_file(&self) -> bool{
		return self.p_current_char == None;
	}
	///Get a string composed of characters given by charset
	/// # Parameters
	/// - `charset` : set of characters to be found at the current iterator position
	/// # Returns
	/// Corresponding string
	pub fn get_str_of(&mut self, charset: &String) -> String{
		let mut out: String = String::from("");
		let mut is_found = true;
		while is_found {
			if self.is_current_char_in_charset(charset) {
				match self.p_current_char {
					// Some(ch) => out += ch.asstring().as_str(),
					Some(ch) => {
						out += &String::from(ch as char);
						self.next();
					},
					None => is_found = false
				};
			}else{
				is_found = false;
			}
		}
		return out;
	}
	///Check if the current char is in the given charset
	/// # Parameters
	/// - `charset` : set of characters to be found at the current iterator position
	/// # Returns
	/// True if the current char is in the given charset, false otherwise
	pub fn is_current_char_in_charset(&self, charset: &String) -> bool{
		match self.p_current_char {
			// Some(ch) => charset.is_contained_in(&ch.to_string()),	//There is an issue and we cannot use it, https://github.com/rust-lang/rust/issues/27721
			Some(ch) => charset.contains(ch as char),
			None => false,
		}
	}
	///Check if the previous char is in the given charset
	/// # Parameters
	/// - `charset` : set of characters to be found at the current iterator position
	/// # Returns
	/// True if the previous char is in the given charset, false otherwise
	pub fn is_prev_char_in_charset(&self, charset: &String) -> bool{
		match self.p_prev_char {
			// Some(ch) => charset.is_contained_in(&ch.to_string()),	//There is an issue and we cannot use it, https://github.com/rust-lang/rust/issues/27721
			Some(ch) => charset.contains(ch as char),
			None => false,
		}
	}
	///Get the current char of the PFileParser
	/// # Returns
	/// Current char as u8 or None
	pub fn get_current_char(&self) -> Option<u8>{
		self.p_current_char
	}
	///Iterate on the next char
	pub fn next(&mut self){
		self.p_prev_char = self.p_current_char.clone();
		//If there is still a current char, let's try if we escaped it
		match self.p_escape_char {
			Some(escape_char) => {
				match self.p_current_char {
					Some(ch) => {
						self.p_is_current_chat_escaped = escape_char == ch && !self.p_is_current_chat_escaped;
					},
					None => self.p_is_current_chat_escaped = false
				}
			},
			None => self.p_is_current_chat_escaped = false
		}
		//No we can move to the next one
		self.p_current_char = match self.p_iter_content.next(){
			Some(ch) => {
				//The char before was the first indented char which was a not a space or tabulation character
				if self.p_is_first_indented_char {
					self.p_is_first_indented_char = false;
					self.p_current_indentation_level = None;
				}else{
					//Here we have to update the current indentation level if there is one
					match &mut self.p_current_indentation_level {
						Some(indentation_level) => {
							//If the current char is an indentation character
							if *ch == ' ' as u8 || *ch == '\t' as u8 {
								*indentation_level += 1;
							}else{	//If the current char is not an indentation character, we disable the indentation level
								self.p_is_first_indented_char = true;
							}
						},//If there is no more indentation level, we have to wait for the next line
						None => {}
					};
				}
				Some(*ch)
			},
			None => {
				self.p_current_indentation_level = None;
				None
			}
		};
		
		//TODO : check is the char is a new line
		self.p_location.set_current_col(self.p_location.get_current_col() + 1);
		if self.is_new_line() {	//If we found a new line
			self.p_location.set_current_line(self.p_location.get_current_line() + 1);	//We increment the line counter
			self.p_location.set_current_col(0);		//We reset the column
			self.p_current_indentation_level = Some(0);	//We have an indentation of 0
			self.p_is_first_indented_char = false;		//We are not the first non-white indented char because this is a newline
		}
	}
	///Says if then current char is a new line
	/// # Returns
	/// True if the current char is a new line, false otherwise
	pub fn is_new_line(&self) -> bool {
		match self.p_current_char {
			Some(ch) => ch == '\n' as u8,
			None => false,
		}
	}
	///Say if the given pattern matches the current location of the PFileParserIter
	/// # Parameters
	/// - `pattern` : pattern to be matched
	/// # Returns
	/// True if the given pattern matches the current location of the PFileParserIter then the iterator will move at the end of the pattern, false otherwise and the iterator does not move
	pub fn is_match(&mut self, pattern: &String) -> bool{
		if pattern.is_empty() {
			return false;
		}
		let vec_u8_pattern: Vec<u8> = Vec::from(pattern.clone().into_bytes());
		return self.is_match_u8(&vec_u8_pattern);
	}
	///Say if the given current_char matches the current char of the PFileParserIter
	/// # Parameters
	/// - `current_char` : expected current character
	/// # Returns
	/// True if the given current_char matches the current char of the PFileParserIter, false otherwise
	fn is_match_current_char(&self, current_char: u8) -> bool{
		// println!("PFileParserIter::is_match_current_char : current_char = {} = '{}'", current_char, String::from(current_char as char));
		match self.p_current_char {
			Some(ch) => ch == current_char,
			None => false,
		}
	}
	///Say if the given pattern matches the current location of the PFileParserIter
	/// # Parameters
	/// - `vec_u8_pattern` : pattern to be matched
	/// # Returns
	/// True if the given pattern matches the current location of the PFileParserIter then the iterator will move at the end of the pattern, false otherwise and the iterator does not move
	fn is_match_u8(&mut self, vec_u8_pattern: &Vec<u8>) -> bool{
		if self.p_is_current_chat_escaped {	//Is the current char is escaped, it cannot match
			return false;
		}
		//Let's save the current iterator variables before any move
		let save_iter = self.p_iter_content.clone();
		let seve_current_char = self.p_current_char.clone();
		let save_location = self.p_location.clone();
		let save_indentation = self.p_current_indentation_level.clone();
		let save_is_first_indent_char = self.p_is_first_indented_char;
		// println!("PFileParserIter::is_match : nbChar = {}", vec_u8_pattern.len());
		for ch in vec_u8_pattern.iter() {
			// println!("PFileParserIter::is_match : check char = {} = '{}'", ch, String::from(*ch as char));
			if !self.is_match_current_char(*ch) {
				// println!("PFileParserIter::is_match : char not found {} = '{}' ", ch, String::from(*ch as char));
				//Let's get the iterator at its previous position
				self.p_iter_content = save_iter.clone();
				self.p_current_char = seve_current_char.clone();
				self.p_location = save_location.clone();
				self.p_current_indentation_level = save_indentation.clone();
				self.p_is_first_indented_char = save_is_first_indent_char;
				return false;
			}
			self.next();
		}
		return true;
	}
	///Get the String until the end_pattern is found
	/// # Parameters
	/// `end_pattern` : pattern which ends the output string
	/// # Returns
	/// String from the current position of the iterator to the end_pattern
	pub fn get_until(&mut self, end_pattern: &String) -> String{
		if end_pattern.is_empty() {
			return String::from("");
		}
		let vec_u8_pattern: Vec<u8> = Vec::from(end_pattern.clone().into_bytes());
		let mut out = String::from("");
		// println!("PFileParser::get_until : end_pattern = '{}'", end_pattern);
		//While we do not found the end_pattern we continue
		while !self.is_match_u8(&vec_u8_pattern) && !self.is_end_of_file() {
			match self.p_current_char {
				Some(ch) => {
					let current_char = String::from(ch as char);
					// println!("PFileParser::get_until : current_char = '{}'", current_char);
					out += &current_char;
					
				},
				None => {}
			};
			self.next();	//Let's go to the next character
		}
		return out;
	}
	///Get the indentation level of the current character
	/// # Returns
	/// Option which contains the current indentation level if there is one or None if other characters than spaces or tabulations are found
	pub fn get_indentation_level(&self) -> &Option<usize> {
		&self.p_current_indentation_level
	}
}

///Parser of a file
pub struct PFileParser{
	///Path to the parsed file
	p_filename: PathBuf,
	///Text content of the file
	p_content: Vec<u8>,
}

impl PFileParser{
	///Create a PFileParser from a file
	/// # Parameters
	/// - `filename` : name of the file to be loaded
	/// # Returns
	/// Initialised PFileParser
	pub fn from_file(filename: &PathBuf) -> Self{
		let file_content: String = match fs::read_to_string(filename) {
			Ok(content) => content,
			Err(err) => panic!("PFileParser::from_file : cannot open file {:?}\n\tError {}",filename, err)
		};
		PFileParser{
			p_filename: filename.clone(),
			p_content: Vec::from(file_content.into_bytes()),
		}
	}
	///Create a PFileParser from a file content
	/// # Parameters
	/// - `content` : content of the file to be used
	/// # Returns
	/// Initialised PFileParser
	pub fn from_content(content: &String) -> Self{
		let mut other = PFileParser{
			p_filename: Default::default(),
			p_content: Vec::from(content.clone().into_bytes()),
		};
		other.set_filename(&PathBuf::from("generated file"));
		return other;
	}
	///Get the iterator on the current PFileParser
	/// # Parameters
	/// `is_escape_char` : true if we want to activate an escape char
	/// # Returns
	/// Iterator on the current PFileParser
	pub fn iter(&self, is_escape_char: bool) -> PFileParserIter<'_>{
		let mut other = PFileParserIter{
			p_location: PLocation::new(&self.p_filename, 1, 0),
			p_iter_content: self.p_content.iter(),
			p_current_char: None,
			p_prev_char: None,
			p_current_indentation_level: Some(0),
			p_is_first_indented_char: false,
			p_escape_char: if is_escape_char {Some('\\' as u8)}else{None},
			p_is_current_chat_escaped: false,
		};
		other.next();	//Just to initialise the current char
		return other;
	}
	///Get the filename of the current PFileParser
	/// # Parameters
	/// - `filename` : Filename of the current PFileParser
	pub fn set_filename(&mut self, filename: &PathBuf){
		self.p_filename = filename.clone();
	}
	///Get the filename of the current PFileParser
	/// # Returns
	/// Filename of the current PFileParser
	pub fn get_filename(&self) -> &PathBuf{
		&self.p_filename
	}
}

#[cfg(test)]
mod tests{
	use super::*;
	use std::fs;
	
	///Test the contains method of str
	#[test]
	fn test_contains_charset(){
		let charset = String::from("shadok");
		assert!(charset.contains("s"));
		assert!(charset.contains("h"));
		assert!(charset.contains("d"));
	}
	
	///Test the PFileParser from text content
	#[test]
	fn test_pfileparser_from_content(){
		let parser: PFileParser = PFileParser::from_content(&String::from("shadok"));
		let mut it = parser.iter(false);
		assert_eq!(it.get_location().get_current_line(), 1);
		assert_eq!(it.get_location().get_current_col(), 1);
		assert_eq!(it.get_str_of(&String::from("ahsk")), String::from("sha"));
	}
	///Test the PFileParser from a file
	#[test]
	fn test_pfileparser_from_file(){
		let file_content = String::from("Some\nText\nOn\nfew\nlines\n\tIndented\n");
		let filename = PathBuf::from("target/parse_test.txt");
		fs::write(&filename, &file_content).unwrap();
		let parser: PFileParser = PFileParser::from_file(&filename);
		assert_eq!(parser.get_filename(), &filename);
		let mut it = parser.iter(false);
		assert_eq!(it.get_indentation_level(), &Some(0));
		assert_eq!(it.get_location().get_filename(), &filename);
		assert_eq!(it.get_location().get_current_line(), 1);
		assert_eq!(it.get_location().get_current_col(), 1);
		assert_eq!(it.get_str_of(&String::from("oemS")), String::from("Some"));
		assert_eq!(it.get_location().get_current_line(), 2);
		assert_eq!(it.get_location().get_current_col(), 0);
		
		assert_eq!(it.get_current_char(), Some('\n' as u8));
		assert!(it.is_new_line());
		assert_eq!(String::from(format!("{}", it.get_location())), String::from("\"target/parse_test.txt\":2:0"));
		
		it.next();	//Just to skip the newline
		assert!(it.is_match(&String::from("Text")));
		assert!(!it.is_match(&String::from("")));
		it.next();	//Skip an other newline
		//We have to test that the position before the failed is_match is the same that the one after
		assert_eq!(it.get_location().get_current_line(), 3);
		assert_eq!(it.get_location().get_current_col(), 1);
		//Here we test a non existing token, so we have to rewind to the previous position
		assert!(!it.is_match(&String::from("Off")));
		assert_eq!(it.get_location().get_current_line(), 3);
		assert_eq!(it.get_location().get_current_col(), 1);
		//Let's check we get a O
		assert_eq!(it.get_current_char(), Some('O' as u8));
		//And Match a given token
		assert_eq!(it.get_until(&String::from("lines")), String::from("On\nfew\n"));
		assert_eq!(it.get_until(&String::from("")), String::from(""));
		//We are on the new line
		assert_eq!(it.get_indentation_level(), &Some(0));
		it.next();
		//We are on the tabulation
		assert_eq!(it.get_indentation_level(), &Some(1));
		it.next();
		assert_eq!(it.get_indentation_level(), &Some(1));
		it.next();
		assert_eq!(it.get_indentation_level(), &None);
	}
	///Test the limit of getuntil
	#[test]
	fn test_limit_pfileparser_getuntil(){
		let file_content = String::from("Some text\\\"but not the end yet\"");
		let parser: PFileParser = PFileParser::from_content(&file_content);
		let mut it = parser.iter(true);
		//Of course we want this :
		assert_eq!(it.get_until(&String::from("\"")), String::from("Some text\\\"but not the end yet"));
	}
	///Test the limit of getuntil
	#[test]
	fn test_limit_pfileparser_getuntil_multiple_escape(){
		let file_content = String::from("\\\\\" other stuff");
		let parser: PFileParser = PFileParser::from_content(&file_content);
		let mut it = parser.iter(true);
		//Of course we want this :
		assert_eq!(it.get_until(&String::from("\"")), String::from("\\\\"));
	}
	
	///Test the PFileParser from text content
	#[test]
	fn test_pfileparser_from_content_indentation(){
		let parser: PFileParser = PFileParser::from_content(&String::from("A\n# Introduction\n"));
		let mut it = parser.iter(false);
		assert!(!it.is_prev_char_in_charset(&String::from("A")));
		assert_eq!(it.get_location().get_current_line(), 1);
		assert_eq!(it.get_location().get_current_col(), 1);
		it.next();
		assert_eq!(it.get_location().get_current_line(), 2);
		assert_eq!(it.get_location().get_current_col(), 0);
		//We are on the new line
		assert!(it.is_prev_char_in_charset(&String::from("A")));
		assert_eq!(it.get_indentation_level(), &Some(0));
		it.next();
		//We are on the #
		assert_eq!(it.get_indentation_level(), &Some(0));
		assert_eq!(it.get_current_char(), Some('#' as u8));
	}
}