1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
use crate::msg;

/// A token is a single unit of a command, such as a word, number or symbol.
/// This is used to convert single characters in a more machine-readable format.
/// For example, the string "ls -l" would be converted to a list of tokens like
/// so: ["ls", "-l"].
///
/// The token kind is a simple enum, which is used to distinguish between the
/// different types of tokens. The token value is a string, which contains the
/// actual value of the token. The token position is a simple integer, which
/// contains the position of the token in the input string. This is used for
/// error reporting.
///
#[derive(Debug)]
#[allow(dead_code)] // TODO: Remove one the parser is implemented
pub struct Token {
    kind: TokenKind,
    value: String,
}

/// As we are only implementing a very simple shell, we only need a few token
/// kinds. These are the token kinds we are going to use:
/// 1. Identifier: A word, such as "ls" or "echo".
/// 2. String: A string, such as "Hello, world!".
/// 3. Number: A number, such as "123".
/// 4. Equals: The equals sign, used for key-value pairs.
/// 5. Dash: The dash sign, used for flags.
#[derive(Debug)]
pub enum TokenKind {
    Identifier,
    String,
    Number,
    Equals,
    Dash,
}

pub fn tokenise(input: String) -> Vec<Token> {
    let mut tokens: Vec<Token> = Vec::new();
    let mut chars = input.chars().collect::<Vec<char>>();

    while let Some(c) = chars.first() {
        let result = match c {
            '-' => Some(Token { kind: TokenKind::Dash, value: chars.remove(0).to_string() }),
            '=' => Some(Token { kind: TokenKind::Equals, value: chars.remove(0).to_string() }),
            ' ' => {
                chars.remove(0);
                continue;
            }
            _ => {
                if c.is_numeric() {
                    Some(tokenise_number(&mut chars))
                } else if c.is_alphabetic() {
                    Some(tokenise_identifier(&mut chars))
                } else if *c == '"' {
                    Some(tokenise_string(&mut chars))
                } else {
                    println!("{}", c);
                    None
                }
            }
        };

        if result.is_none() {
            panic!("{}", msg::ERR_LX_UNK_TOK);
        }
        tokens.push(result.unwrap());
    }

    tokens
}

fn tokenise_identifier(chars: &mut Vec<char>) -> Token {
    let mut value = String::new();

    while let Some(c) = chars.first() {
        if c.is_alphanumeric() {
            value.push(chars.remove(0));
        } else {
            break;
        }
    }

    Token { kind: TokenKind::Identifier, value }
}
fn tokenise_number(chars: &mut Vec<char>) -> Token {
    let mut value = String::new();

    let mut decimals = 0;
    while let Some(c) = chars.first() {
        if c.is_numeric() || *c == '.' {
            if *c == '.' {
                decimals += 1;
            }
            value.push(chars.remove(0));
        } else {
            break;
        }
    }

    if decimals > 1 {
        panic!("{}", msg::ERR_LX_INV_DEC_MUL_PT);
    }
    Token { kind: TokenKind::Number, value }
}

fn tokenise_string(chars: &mut Vec<char>) -> Token {
    chars.remove(0);
    let mut value = String::new();

    while let Some(c) = chars.first() {
        if *c == '"' {
            chars.remove(0);
            break;
        } else {
            value.push(chars.remove(0));
        }
    }

    Token { kind: TokenKind::String, value }
}