1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
use std::collections::HashMap;
use crate::ast::{Object, Trigger, AST};
use log::{debug, warn};
use Result::Ok;
/// The version of the RiveScript spec we support.
///
/// This is compared against the `! version = 2.0` command which may be found in the
/// RiveScript code being parsed. If the version command is present and greater than
/// this number, the Parser will return an error.
const RIVESCRIPT_SPEC_VERSION: f32 = 2.0;
/// The RiveScript language parser.
pub struct Parser {}
enum ConcatMode {
None,
Newline,
Space,
}
impl ConcatMode {
fn to_string(&self) -> &'static str {
match self {
ConcatMode::None => "",
ConcatMode::Newline => "\n",
ConcatMode::Space => " ",
}
}
fn parse(v: &str) -> Option<Self> {
match v {
"none" => Some(ConcatMode::None),
"newline" => Some(ConcatMode::Newline),
"space" => Some(ConcatMode::Space),
_ => None,
}
}
}
impl Parser {
/// Create a new instance of the parser. It takes no parameters.
pub fn new() -> Self {
Self {}
}
/// Parse RiveScript source code and return the Abstract Syntax Tree.
///
/// The filename is used only for syntax error reporting (so the filename and line number
/// can be included in the error).
pub fn parse(&self, filename: &str, contents: String) -> Result<AST, String> {
debug!("BEGIN PARSE ON FILENAME: {}", filename);
// Start building an AST parsed from these files.
let mut ast = AST::new();
// Local (file-scoped) parser options.
let mut local_options: HashMap<String, String> = HashMap::new();
local_options.insert("concat".to_string(), "none".to_string());
let mut concat_mode = ConcatMode::None;
// Some temporary state variables as we parse this file.
let mut topic = String::from(crate::DEFAULT_TOPIC);
let mut current_trigger = Trigger::new("");
let mut lineno: usize = 0;
let mut in_comment = false;
let mut in_object = false;
let mut object_name = String::from("");
let mut object_language = String::from("");
let mut object_buffer: Vec<String> = Vec::new();
// Initialize the "random" topic.
ast.init_topic(&topic);
// Go through the lines of code.
// let mut lines = contents.lines();
let lines: Vec<String> = contents.lines().map(|s| s.to_string()).collect();
loop {
if lineno >= lines.len() {
break;
}
let mut line = lines[lineno].to_string();
lineno += 1;
// Strip the line (skip empty lines).
line = line.trim().to_string();
if line.len() == 0 {
continue;
}
// Are we inside of a `> object` macro?
if in_object {
// Have we reached the end?
if line.contains("< object") || line.contains("<object") {
if object_name.len() > 0 {
let new_object =
Object::new(&object_name, &object_language, object_buffer.to_owned());
ast.objects.insert(object_name.to_string(), new_object);
in_object = false;
}
} else {
object_buffer.push(line);
}
continue;
}
// Handle and ignore comments.
if line.starts_with("//") {
continue; // single-line comment.
} else if line.starts_with("/*") {
// Start of a multi-line comment block.
if line.contains("*/") {
continue; // The end is on the same line!
}
// Now inside a comment block.
in_comment = true;
continue;
} else if line.contains("*/") {
// End of a multi-line comment block.
in_comment = false;
continue;
} else if in_comment {
continue;
}
debug!("Line #{}: {}", lineno, line);
// Separate the command from its data.
if line.len() < 2 {
warn!(
"Weird single-character line '{}' found at {} line {}",
line, filename, lineno,
);
continue;
}
let cmd = &line[..1];
let mut line = line[1..].trim().to_string();
// Ignore inline comments at the end of the line.
if line.contains(" // ") {
let mut splitter = line.splitn(2, " // ");
line = splitter.next().unwrap_or("").to_string();
}
// Do a look-ahead for ^Continue and %Previous commands.
if cmd != "^" {
let mut li = lineno;
loop {
if li >= lines.len() {
break;
}
let lookahead = lines[li].trim();
li += 1;
if lookahead.len() < 2 {
continue;
}
let look_cmd = &lookahead[..1];
let lookahead = lookahead[1..].trim();
// We only care about a couple of lookahead command types.
if look_cmd != "^" || lookahead.len() == 0 {
break;
}
// If our parent command is a ! and the next command(s) are ^,
// we'll tack each extension on as a "fake line break" (which
// is useful information for !arrays especially)
if cmd == "!" {
if look_cmd == "^" {
line.push_str("<crlf>");
line.push_str(lookahead);
}
continue;
}
// Concatenate ^Continue lines with the current concat mode characters.
if cmd != "^" && look_cmd == "^" {
line = format!("{line}{}{lookahead}", concat_mode.to_string());
}
}
}
// Handle the types of RiveScript commands.
match cmd {
// !Definition
"!" => {
warn!("Found a !DEFINITION");
// The command looks like:
// ! version = 2.0
// ! global depth = 50
// ! var name = Chatbot
// ! sub who's = who is
let mut halves = line.splitn(2, "=");
let left = halves.next().unwrap_or("").trim();
let right = halves.next().unwrap_or("").trim();
let mut value = String::from("");
let mut kind = ""; // global, var, sub, ...
let mut name = "";
if right.len() > 0 {
// The right half of the = sign is always the value.
value.push_str(right);
}
if left.len() >= 1 {
// The left half has the kind and maybe the name.
// If `! version` there is only the kind=version,
// everything else has a name.
if left.contains(" ") {
let mut halves = left.splitn(2, " ");
kind = halves.next().unwrap_or("").trim();
name = halves.next().unwrap_or("").trim();
} else {
kind = left;
}
}
// Remove 'fake' line breaks unless this is an array.
if kind != "array" {
value = value.replace("<crlf>", concat_mode.to_string());
}
// Handle RiveScript specification version checks.
if kind == "version" {
warn!("Found a version str: {}", value);
let version = value.parse::<f32>().unwrap_or(0.0);
if version == 0.0 {
return Err(
"Didn't parse version string; was it a properly formatted number?".to_string(),
);
} else if version > RIVESCRIPT_SPEC_VERSION {
return Err(
"This RiveScript document declares a `! version` number higher than we support".to_string(),
);
} else {
ast.version = version;
}
continue;
}
// All other types of defines require a value and a name.
if name.len() == 0 {
warn!("Undefined variable name at {} line {}", filename, lineno);
continue;
} else if value.len() == 0 {
warn!("Undefined variable value at {} line {}", filename, lineno);
continue;
}
// Handle the rest of the !Define types.
match kind {
"local" => {
debug!("\tSet local parser option {} = {}", name, value);
local_options.insert(name.to_string(), value.to_string());
// Changing the ^Continue concatenation mode?
if name == "concat" {
if let Some(v) = ConcatMode::parse(&value) {
concat_mode = v;
} else {
warn!("Invalid value for '! local concat': '{value}'");
concat_mode = ConcatMode::None;
}
}
}
"global" => {
debug!("\tSet global {} = {}", name, value);
ast.set_global(name, &value);
}
"var" => {
debug!("\tSet bot variable {} = {}", name, value);
ast.set_bot_var(name, &value);
}
"sub" => {
debug!("\tSet substitution {} => {}", name, value);
ast.subs.insert(name.to_string(), value.to_string());
}
"person" => {
debug!("\tSet person substitution {} => {}", name, value);
ast.person.insert(name.to_string(), value.to_string());
}
"array" => {
debug!("\tSet array {} = {}", name, value);
// Did we have multiple parts to this array? (^Continues)
let parts = value.split("<crlf>");
// Process each row of array data independently.
let mut fields: Vec<String> = Vec::new();
for val in parts {
if val.contains("|") {
// Pipe-separated array (so the words can have spaces)
let mut other: Vec<String> =
val.split("|").map(str::to_string).collect();
fields.append(&mut other);
} else {
let mut other: Vec<String> =
val.split_whitespace().map(str::to_string).collect();
fields.append(&mut other);
}
}
// Convert any remaining '\s' escape sequences to spaces.
for field in fields.iter_mut() {
*field = field.replace("\\s", " ");
}
ast.arrays.insert(name.to_string(), fields);
}
&_ => {
warn!(
"Unknown definition type '{}' at {} line {}",
kind, filename, lineno,
);
}
}
}
// > Label
">" => {
warn!("Found a >LABEL");
// The command looks like:
// > begin
// > topic random
// > object something perl
let mut fields: Vec<String> =
line.split_whitespace().map(str::to_string).collect();
if fields.len() == 0 {
continue;
}
// First field is always the kind (begin, topic, object)
let mut kind = fields.remove(0);
// Next field may be the name (of topic or object)
let mut name = String::from("");
if fields.len() > 0 {
name = fields.remove(0);
}
// BEGIN is a type of topic.
if kind == "begin" {
kind = String::from("topic");
name = String::from(crate::BEGIN_TOPIC);
}
// Handle the kinds of labels.
match kind.as_str() {
"topic" => {
ast.init_topic(&name);
// If we parsed a last trigger, commit and flush it
// ahead of the topic change.
if current_trigger.is_populated() {
debug!("Starting a new topic, commit the current trigger to topic {topic}: {:?}", current_trigger);
let t = ast.topics.get_mut(&topic).expect("or else");
t.add_trigger(current_trigger);
}
current_trigger = Trigger::new("");
// Set the pointer for triggers to enter this topic.
topic = name.to_string();
// Does this topic inherit or include another?
let mut mode = String::from("");
if fields.len() > 0 {
for field in fields {
if field == "includes" || field == "inherits" {
mode = field.to_string();
} else if mode == "includes" {
let t = ast.topics.get_mut(&topic).expect("or else");
t.set_includes(field.to_string());
} else if mode == "inherits" {
let t = ast.topics.get_mut(&topic).expect("or else");
t.set_inherits(field.to_string());
}
}
}
}
"object" => {
// Start of an object macro definition.
let mut language = String::from("");
if fields.len() > 0 {
language = fields.remove(0).to_lowercase();
}
// No language defined?
if language.len() == 0 {
warn!(
"No programming language defined for object '{}' at {} line {}",
name, filename, lineno,
);
in_object = true;
object_name = name;
object_language = language;
continue;
}
// Start reading the object code.
object_name = name;
object_language = language;
object_buffer.truncate(0);
in_object = true;
}
&_ => {
warn!(
"Unsupported >LABEL kind '{}' found at {} line {}",
kind, filename, lineno,
);
}
}
}
// < Label
"<" => {
let kind = line;
// If we were working on a trigger, commit it to AST now.
if current_trigger.is_populated() {
let t = ast.topics.get_mut(&topic).expect("or else");
t.add_trigger(current_trigger);
current_trigger = Trigger::new(&"");
}
if kind == "begin" || kind == "topic" {
topic = crate::DEFAULT_TOPIC.to_string();
}
}
// + Trigger
"+" => {
// Were we working on a previous trigger? If so, give it
// over to the AST and start a new one. We can't give it
// over NOW because we will need to own/modify it to
// add replies/conditions/etc.
if current_trigger.is_populated() {
let t = ast.topics.get_mut(&topic).expect("or else");
t.add_trigger(current_trigger);
}
current_trigger = Trigger::new(line.as_str());
}
// % Previous
"%" => {
current_trigger.previous = line.to_string();
}
// - Response
"-" => {
current_trigger.reply.push(line.to_string());
}
// * Condition
"*" => {
// Split everything apart.
let parts: Vec<String> = line.splitn(2, "=>").map(|s| s.to_string()).collect();
let condition = parts.get(0).unwrap().trim();
let reply = parts.get(1).map(|s| s.as_str()).unwrap_or("").trim();
// Parse the conditional side.
match crate::regex::CONDITION.captures(&condition) {
Some(caps) => {
let left = caps.get(1).unwrap().as_str();
let operator = caps.get(2).unwrap().as_str();
let right = caps.get(3).unwrap().as_str();
current_trigger.condition.push(crate::ast::Condition{
left: left.to_string(),
operator: operator.to_string(),
right: right.to_string(),
reply: reply.to_string(),
});
},
None => {
// TODO: raise syntax error.
},
}
}
// @ Redirect
"@" => {
current_trigger.redirect = line.to_string();
}
// ^ Continue was handled in lookahead above.
"^" => continue,
&_ => {
warn!(
"Unsupported RiveScript command '{}' found at {} line {}",
cmd, filename, lineno,
);
}
}
}
// If we had a final trigger ready to go, add it to the AST.
if current_trigger.is_populated() {
let t = ast.topics.get_mut(&topic).expect("or else");
t.add_trigger(current_trigger);
}
Ok(ast)
}
}