1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
use regex::Regex;
#[allow(dead_code)]
#[derive(Clone, Debug)]
pub(crate) struct RegexList {
pub newline: Regex,
pub line_space: Regex,
pub comment: Regex,
pub token_name: Regex,
/// Matches everything inside token argument.
/// Used as a capture everything allowed inside an argument.
pub token_argument_all: Regex,
pub token_argument_integer: Regex,
pub token_argument_character: Regex,
pub token_argument_arg_n: Regex,
pub token_argument_reference: Regex,
pub token_argument_string: Regex,
pub token_argument_pipe_arguments: Regex,
pub token_argument_bang_arg_n: Regex,
pub token_open_bracket: Regex,
pub token_close_bracket: Regex,
pub token_separator: Regex,
pub token_pipe_separator: Regex,
pub header: Regex,
}
impl RegexList {
#[allow(clippy::trivial_regex)]
pub fn new() -> Self {
// WARNING: Note that changing these expressions should be done with GREAT care.
// Because changing this might have strong effects on the parsing speed and accuracy.
// For more info about all the regex expressions see the `docs/Syntax_Highlighting.md` file.
Self {
// Accepts `\n` or `\r\n`.
newline: Regex::new(r"\r?\n").unwrap(),
// Accepts a sequence of ` ` (space) or `\t` (tab) and all combinations.
// Needs to be at least 1 character long.
line_space: Regex::new(r"[ \t]+").unwrap(),
// Accepts everything except `[` or `]` and all combinations.
// Needs to be at least 1 character long.
comment: Regex::new(r"[^\[\]]+").unwrap(),
// Accepts all sequences of numbers, and upper case latin characters.
// `_` (underscore) is also allowed.
// Needs to contain at least 1 upper case latin character.
// It can not start with `_` or a number.
//
// Because of issue #38 a special case needed to be added for `VOLUME_mB`.
token_name: Regex::new(r"(?:VOLUME_mB)|(?:[A-Z][A-Z_0-9]*)").unwrap(),
// Accepts all sequence of characters except: `[`, `]`, `:`, `\r` or `\n`.
// Needs to be at least 1 character long.
token_argument_all: Regex::new(r"[^\[\]:\r\n]+").unwrap(),
// Accepts all sequence of numbers. It can start with a `-` character.
// Needs to be at least 1 number long.
// Leading zeros do not effect the parsing. (issue #113)
token_argument_integer: Regex::new(r"-?[0-9]+").unwrap(),
// Accepts all characters surrounded by single quotes,
// except for `]` and `:` (because of issue #111).
// `\r` and `\n` are also not allowed because newlines are not allowed inside tokens.
// Example `'a'` or `'?'`.
// Is always exact 3 characters long.
token_argument_character: Regex::new(r"('[^\]:\r\n]')").unwrap(),
// A sequence starting with "ARG"
// followed by any sequence of numbers.
// At least 1 number is required.
// No leading zeros allowed.
token_argument_arg_n: Regex::new(r"ARG(?:0|[1-9][0-9]*)").unwrap(),
// Accepts all sequences of numbers, and upper case latin characters.
// `_` (underscore) is also allowed.
// Needs to contain at least 1 upper case latin character.
// It can not start with `_`.
//
// Because of issue #93 it can also start with a number.
token_argument_reference: Regex::new(r"(?:[0-9]_*)*([A-Z][A-Z_0-9]*)").unwrap(),
// Accepts all sequence of characters except: `|`, `[`, `]`, `:`, `\r` or `\n`.
// Needs to be at least 1 character long.
token_argument_string: Regex::new(r"[^\|\[\]:\r\n]+").unwrap(),
// Accepts all sequence of characters except: `[`, `]`, `:`, `\r` or `\n`.
// Needs to be at least 1 character long.
// NOTE: The difference with string is that this does accept the `|` character.
token_argument_pipe_arguments: Regex::new(r"[^\[\]:\r\n]+").unwrap(),
// A sequence starting with "!ARG"
// followed by any sequence of numbers.
// At least 1 number is required.
// This expression can be matched multiple
// times in a string/reference.
// No leading zeros allowed.
token_argument_bang_arg_n: Regex::new(r"!ARG(?:0|[1-9][0-9]*)").unwrap(),
// Accepts just 1 `[` (open square bracket)
token_open_bracket: Regex::new(r"\[").unwrap(),
// Accepts just 1 `]` (closing square bracket)
token_close_bracket: Regex::new(r"\]").unwrap(),
// Accepts just 1 `:` (colon)
token_separator: Regex::new(r":").unwrap(),
// Accepts just 1 `|` (pipe)
token_pipe_separator: Regex::new(r"\|").unwrap(),
// Accepts a sequence of characters that has at least one character
// except `[`, `]`, `\r` or `\n`.
// Needs to be at least 1 character long.
header: Regex::new(r"[^\[\]\r\n]+").unwrap(),
}
}
}