1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
use crate::cop;
use crate::cop::register_tokens_handler;
use crate::source;
use crate::types;
use regex::Regex;

static IDENTIFIER_MSG: &str = "Use only ascii symbols in identifiers.";
// static CONSTANT_MSG: &str = "Use only ascii symbols in constants.";
static COP_NAME: &str = "Naming/AsciiIdentifiers";

pub fn init() {
    register_tokens_handler(on_tokens, COP_NAME);

    cop::register(COP_NAME);
}

pub fn on_tokens(tokens: &Vec<types::Token>, file: &source::File) {
    for token in tokens {
        if should_scheck(&token) && !is_ascci(&token.token_value) {
            let offense = first_offense_range(&token);
            file.add_offense(COP_NAME, offense, IDENTIFIER_MSG);
        }
    }
}

fn is_ascci(bytes: &types::Bytes) -> bool {
    bytes.to_string().unwrap().is_ascii()
}

fn should_scheck(token: &types::Token) -> bool {
    token.token_name() == "tIDENTIFIER"
}

fn first_offense_range(token: &types::Token) -> types::Loc {
    let re = Regex::new(r"[^[:ascii:]]+").unwrap();
    let binding = token.token_value.to_string().unwrap();
    let mat = re.find(&binding).unwrap();

    types::Loc {
        begin: token.loc.begin + mat.start(),
        end: token.loc.begin + mat.end(),
    }
}

#[cfg(test)]
mod tests {
    #[test]
    fn ascii_variable_identifier() {
        crate::expect_no_offense!("name = 'aaa'");
    }

    #[test]
    fn non_ascii_variable_identifier() {
        crate::expect_offense!("foo∂∂bar = 'aa'");
    }
}