frostwalker 0.1.2

A TOML-like configuration language parser with zero dependencies outside of std.
Documentation
//! Module containing the lexer which turns the source into a Token list.
use super::{Token, Class};

/// British English spelling for `tokenize()`.
pub fn tokenise(source: &str) -> Vec<Token> {
	return tokenize(source);
}

/// Takes in a source string and turns it into a Token list.
///
/// Any unknown text will be given the class UNKNOWN and will fail when reaching the validator.
///
/// ```
/// use frostwalker::{lexer, Class, Token};
///
/// let list = lexer::tokenize("meaning_of_life = 42");
/// let ident = Token { class: Class::IDENTIFIER, value: Some("meaning_of_life".to_string()) };
/// let equals = Token { class: Class::EQUALS, value: None };
/// let val = Token { class: Class::LITERAL, value: Some("42".to_string()) };
///
/// assert_eq!(list, vec![ident, equals, val]);
/// ```
pub fn tokenize(source: &str) -> Vec<Token> {
	let lines: Vec<&str> = source.lines().collect();
	let lines_len = lines.len();
	let mut tree: Vec<Token> = vec![];

	for line in lines {
		let mut added = false;
		let mut words: Vec<&str> = line.split(" ").collect();
		let mut i = 0;

		while i < words.len() {
			if words[i].ends_with(",") && (words[i-1] == "[" || words[i-1] == ",") {
				words[i] = words[i].chars().next_back().map(|_| &words[i][..words[i].len()-1]).unwrap_or("");
				words.insert(i+1, ",");
			}

			if words[i].ends_with("#") && words[i] != "#" && !words[i].starts_with("\"") {
				words[i] = words[i].chars().next_back().map(|_| &words[i][..words[i].len()-1]).unwrap_or("");
				words.insert(i+1, "#");
			}

			if words[i] == "," {
				tree.push(Token { class: Class::SEPARATOR, value: Some(",".to_string()) });
				added = true;
				i = i + 1;
				continue;
			}

			if words[i].replace(" ","") == "" {
				i = i + 1;
				continue;
			}

			if words[i] == "[" {
				tree.push(Token { class: Class::SEPARATOR, value: Some("[".to_string()) });
				added = true;
				i = i + 1;
				continue;
			}

			if words[i] == "]" {
				tree.push(Token { class: Class::SEPARATOR, value: Some("]".to_string()) });
				added = true;
				i = i + 1;
				continue;
			}

			if words[i] == "true" || words[i] == "TRUE" {
				tree.push(Token { class: Class::BOOLEAN, value: Some("true".to_string()) });
				added = true;
				i = i + 1;
				continue;
			}

			if words[i] == "false" || words[i] == "FALSE" {
				tree.push(Token { class: Class::BOOLEAN, value: Some("false".to_string()) });
				added = true;
				i = i + 1;
				continue;
			}

			if words[i].parse::<i32>().is_ok() {
				tree.push(Token { class: Class::LITERAL, value: Some(words[i].to_string()) });
				added = true;
				i = i + 1;
				continue;
			}

			if (words.len() - i >= 2) && words[i+1] == "=" {
				if words[i].starts_with("\"") {
					words[i] = words[i].chars().next().map(|c| &words[i][c.len_utf8()..]).unwrap_or("");
				}

				if words[i].ends_with("\"") {
					words[i] = words[i].chars().next_back().map(|_| &words[i][..words[i].len()-1]).unwrap_or("");
				}
				tree.push(Token { class: Class::IDENTIFIER, value: Some(words[i].to_string()) });
				added = true;
				tree.push(Token { class: Class::EQUALS, value: None });
				i = i + 2;
				continue;
			}

			if words[i] == "=" {
				tree.push(Token { class: Class::EQUALS, value: None });
				added = true;
				i = i + 1;
				continue;
			}

			if words[i].starts_with("\"") {
				if words[i].ends_with("\"") {
					words[i] = words[i].chars().next().map(|c| &words[i][c.len_utf8()..]).unwrap_or("");
					words[i] = words[i].chars().next_back().map(|_| &words[i][..words[i].len()-1]).unwrap_or("");
					tree.push(Token { class: Class::LITERAL, value: Some(words[i].replace("\\\"", "\"").to_string()) });
					added = true;
					i = i + 1;
					continue;
				} else {
					words[i] = words[i].chars().next().map(|c| &words[i][c.len_utf8()..]).unwrap_or("");
					let mut built_string = words[i].replace("\\\"", "\"").to_string();

					loop {					
						i = i + 1;
						if words[i].ends_with("\"") {
							words[i] = words[i].chars().next_back().map(|_| &words[i][..words[i].len()-1]).unwrap_or("");
							built_string = format!("{} {}", built_string, words[i].replace("\\\"", "\""));
							break;
						}
						built_string = format!("{} {}", built_string, words[i].replace("\\\"", "\""));
					}
					tree.push(Token { class: Class::LITERAL, value: Some(built_string) });
					added = true;
					i = i + 1;
					continue;
				}	
			}

			if words[i].starts_with("#") {
				break;
			}

			if words.len() - i < 2 && tree[tree.len()-1].class == Class::NEWLINE {
				tree.push(Token { class: Class::IDENTIFIER, value: Some(words[i].to_string()) });
				added = true;
				i = i + 1;
				continue;
			}

			tree.push(Token { class: Class::UNKNOWN, value: Some(words[i].to_string()) });
			i = i + 1;
		}
		if lines_len > 1 && added {
			tree.push(Token { class: Class::NEWLINE, value: None });
		}
	}

	if lines_len > 1 {
		tree.pop();
	}

	return tree;
}