1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
use crate::msg;
/// A token is a single unit of a command, such as a word, number or symbol.
/// This is used to convert single characters in a more machine-readable format.
/// For example, the string "ls -l" would be converted to a list of tokens like
/// so: ["ls", "-l"].
///
/// The token kind is a simple enum, which is used to distinguish between the
/// different types of tokens. The token value is a string, which contains the
/// actual value of the token. The token position is a simple integer, which
/// contains the position of the token in the input string. This is used for
/// error reporting.
///
#[derive(Debug)]
#[allow(dead_code)] // TODO: Remove one the parser is implemented
pub struct Token {
kind: TokenKind,
value: String,
}
/// As we are only implementing a very simple shell, we only need a few token
/// kinds. These are the token kinds we are going to use:
/// 1. Identifier: A word, such as "ls" or "echo".
/// 2. String: A string, such as "Hello, world!".
/// 3. Number: A number, such as "123".
/// 4. Equals: The equals sign, used for key-value pairs.
/// 5. Dash: The dash sign, used for flags.
#[derive(Debug)]
pub enum TokenKind {
Identifier,
String,
Number,
Equals,
Dash,
}
pub fn tokenise(input: String) -> Vec<Token> {
let mut tokens: Vec<Token> = Vec::new();
let mut chars = input.chars().collect::<Vec<char>>();
while let Some(c) = chars.first() {
let result = match c {
'-' => Some(Token { kind: TokenKind::Dash, value: chars.remove(0).to_string() }),
'=' => Some(Token { kind: TokenKind::Equals, value: chars.remove(0).to_string() }),
' ' => {
chars.remove(0);
continue;
}
_ => {
if c.is_numeric() {
Some(tokenise_number(&mut chars))
} else if c.is_alphabetic() {
Some(tokenise_identifier(&mut chars))
} else if *c == '"' {
Some(tokenise_string(&mut chars))
} else {
println!("{}", c);
None
}
}
};
if result.is_none() {
panic!("{}", msg::ERR_LX_UNK_TOK);
}
tokens.push(result.unwrap());
}
tokens
}
fn tokenise_identifier(chars: &mut Vec<char>) -> Token {
let mut value = String::new();
while let Some(c) = chars.first() {
if c.is_alphanumeric() {
value.push(chars.remove(0));
} else {
break;
}
}
Token { kind: TokenKind::Identifier, value }
}
fn tokenise_number(chars: &mut Vec<char>) -> Token {
let mut value = String::new();
let mut decimals = 0;
while let Some(c) = chars.first() {
if c.is_numeric() || *c == '.' {
if *c == '.' {
decimals += 1;
}
value.push(chars.remove(0));
} else {
break;
}
}
if decimals > 1 {
panic!("{}", msg::ERR_LX_INV_DEC_MUL_PT);
}
Token { kind: TokenKind::Number, value }
}
fn tokenise_string(chars: &mut Vec<char>) -> Token {
chars.remove(0);
let mut value = String::new();
while let Some(c) = chars.first() {
if *c == '"' {
chars.remove(0);
break;
} else {
value.push(chars.remove(0));
}
}
Token { kind: TokenKind::String, value }
}