rivescript_core/parser.rs
1use std::collections::HashMap;
2
3use crate::ast::{Object, Trigger, AST};
4use log::{debug, warn};
5use Result::Ok;
6
7/// The version of the RiveScript spec we support.
8///
9/// This is compared against the `! version = 2.0` command which may be found in the
10/// RiveScript code being parsed. If the version command is present and greater than
11/// this number, the Parser will return an error.
12const RIVESCRIPT_SPEC_VERSION: f32 = 2.0;
13
14/// The RiveScript language parser.
15pub struct Parser {}
16
17enum ConcatMode {
18 None,
19 Newline,
20 Space,
21}
22
23impl ConcatMode {
24 fn to_string(&self) -> &'static str {
25 match self {
26 ConcatMode::None => "",
27 ConcatMode::Newline => "\n",
28 ConcatMode::Space => " ",
29 }
30 }
31
32 fn parse(v: &str) -> Option<Self> {
33 match v {
34 "none" => Some(ConcatMode::None),
35 "newline" => Some(ConcatMode::Newline),
36 "space" => Some(ConcatMode::Space),
37 _ => None,
38 }
39 }
40}
41
42impl Parser {
43 /// Create a new instance of the parser. It takes no parameters.
44 pub fn new() -> Self {
45 Self {}
46 }
47
48 /// Parse RiveScript source code and return the Abstract Syntax Tree.
49 ///
50 /// The filename is used only for syntax error reporting (so the filename and line number
51 /// can be included in the error).
52 pub fn parse(&self, filename: &str, contents: String) -> Result<AST, String> {
53 debug!("BEGIN PARSE ON FILENAME: {}", filename);
54
55 // Start building an AST parsed from these files.
56 let mut ast = AST::new();
57
58 // Local (file-scoped) parser options.
59 let mut local_options: HashMap<String, String> = HashMap::new();
60 local_options.insert("concat".to_string(), "none".to_string());
61 let mut concat_mode = ConcatMode::None;
62
63 // Some temporary state variables as we parse this file.
64 let mut topic = String::from(crate::DEFAULT_TOPIC);
65 let mut current_trigger = Trigger::new("");
66 let mut lineno: usize = 0;
67 let mut in_comment = false;
68 let mut in_object = false;
69 let mut object_name = String::from("");
70 let mut object_language = String::from("");
71 let mut object_buffer: Vec<String> = Vec::new();
72
73 // Initialize the "random" topic.
74 ast.init_topic(&topic);
75
76 // Go through the lines of code.
77 // let mut lines = contents.lines();
78 let lines: Vec<String> = contents.lines().map(|s| s.to_string()).collect();
79 loop {
80 if lineno >= lines.len() {
81 break;
82 }
83
84 let mut line = lines[lineno].to_string();
85 lineno += 1;
86
87 // Strip the line (skip empty lines).
88 line = line.trim().to_string();
89 if line.len() == 0 {
90 continue;
91 }
92
93 // Are we inside of a `> object` macro?
94 if in_object {
95 // Have we reached the end?
96 if line.contains("< object") || line.contains("<object") {
97 if object_name.len() > 0 {
98 let new_object =
99 Object::new(&object_name, &object_language, object_buffer.to_owned());
100 ast.objects.insert(object_name.to_string(), new_object);
101 in_object = false;
102 }
103 } else {
104 object_buffer.push(line);
105 }
106 continue;
107 }
108
109 // Handle and ignore comments.
110 if line.starts_with("//") {
111 continue; // single-line comment.
112 } else if line.starts_with("/*") {
113 // Start of a multi-line comment block.
114 if line.contains("*/") {
115 continue; // The end is on the same line!
116 }
117
118 // Now inside a comment block.
119 in_comment = true;
120 continue;
121 } else if line.contains("*/") {
122 // End of a multi-line comment block.
123 in_comment = false;
124 continue;
125 } else if in_comment {
126 continue;
127 }
128
129 debug!("Line #{}: {}", lineno, line);
130
131 // Separate the command from its data.
132 if line.len() < 2 {
133 warn!(
134 "Weird single-character line '{}' found at {} line {}",
135 line, filename, lineno,
136 );
137 continue;
138 }
139 let cmd = &line[..1];
140 let mut line = line[1..].trim().to_string();
141
142 // Ignore inline comments at the end of the line.
143 if line.contains(" // ") {
144 let mut splitter = line.splitn(2, " // ");
145 line = splitter.next().unwrap_or("").to_string();
146 }
147
148 // Do a look-ahead for ^Continue and %Previous commands.
149 if cmd != "^" {
150 let mut li = lineno;
151 loop {
152 if li >= lines.len() {
153 break;
154 }
155
156 let lookahead = lines[li].trim();
157 li += 1;
158 if lookahead.len() < 2 {
159 continue;
160 }
161
162 let look_cmd = &lookahead[..1];
163 let lookahead = lookahead[1..].trim();
164
165 // We only care about a couple of lookahead command types.
166 if look_cmd != "^" || lookahead.len() == 0 {
167 break;
168 }
169
170 // If our parent command is a ! and the next command(s) are ^,
171 // we'll tack each extension on as a "fake line break" (which
172 // is useful information for !arrays especially)
173 if cmd == "!" {
174 if look_cmd == "^" {
175 line.push_str("<crlf>");
176 line.push_str(lookahead);
177 }
178 continue;
179 }
180
181 // Concatenate ^Continue lines with the current concat mode characters.
182 if cmd != "^" && look_cmd == "^" {
183 line = format!("{line}{}{lookahead}", concat_mode.to_string());
184 }
185 }
186 }
187
188 // Handle the types of RiveScript commands.
189 match cmd {
190 // !Definition
191 "!" => {
192 warn!("Found a !DEFINITION");
193
194 // The command looks like:
195 // ! version = 2.0
196 // ! global depth = 50
197 // ! var name = Chatbot
198 // ! sub who's = who is
199 let mut halves = line.splitn(2, "=");
200 let left = halves.next().unwrap_or("").trim();
201 let right = halves.next().unwrap_or("").trim();
202 let mut value = String::from("");
203 let mut kind = ""; // global, var, sub, ...
204 let mut name = "";
205
206 if right.len() > 0 {
207 // The right half of the = sign is always the value.
208 value.push_str(right);
209 }
210 if left.len() >= 1 {
211 // The left half has the kind and maybe the name.
212 // If `! version` there is only the kind=version,
213 // everything else has a name.
214 if left.contains(" ") {
215 let mut halves = left.splitn(2, " ");
216 kind = halves.next().unwrap_or("").trim();
217 name = halves.next().unwrap_or("").trim();
218 } else {
219 kind = left;
220 }
221 }
222
223 // Remove 'fake' line breaks unless this is an array.
224 if kind != "array" {
225 value = value.replace("<crlf>", concat_mode.to_string());
226 }
227
228 // Handle RiveScript specification version checks.
229 if kind == "version" {
230 warn!("Found a version str: {}", value);
231 let version = value.parse::<f32>().unwrap_or(0.0);
232 if version == 0.0 {
233 return Err(
234 "Didn't parse version string; was it a properly formatted number?".to_string(),
235 );
236 } else if version > RIVESCRIPT_SPEC_VERSION {
237 return Err(
238 "This RiveScript document declares a `! version` number higher than we support".to_string(),
239 );
240 } else {
241 ast.version = version;
242 }
243 continue;
244 }
245
246 // All other types of defines require a value and a name.
247 if name.len() == 0 {
248 warn!("Undefined variable name at {} line {}", filename, lineno);
249 continue;
250 } else if value.len() == 0 {
251 warn!("Undefined variable value at {} line {}", filename, lineno);
252 continue;
253 }
254
255 // Handle the rest of the !Define types.
256 match kind {
257 "local" => {
258 debug!("\tSet local parser option {} = {}", name, value);
259 local_options.insert(name.to_string(), value.to_string());
260
261 // Changing the ^Continue concatenation mode?
262 if name == "concat" {
263 if let Some(v) = ConcatMode::parse(&value) {
264 concat_mode = v;
265 } else {
266 warn!("Invalid value for '! local concat': '{value}'");
267 concat_mode = ConcatMode::None;
268 }
269 }
270 }
271 "global" => {
272 debug!("\tSet global {} = {}", name, value);
273 ast.set_global(name, &value);
274 }
275 "var" => {
276 debug!("\tSet bot variable {} = {}", name, value);
277 ast.set_bot_var(name, &value);
278 }
279 "sub" => {
280 debug!("\tSet substitution {} => {}", name, value);
281 ast.subs.insert(name.to_string(), value.to_string());
282 }
283 "person" => {
284 debug!("\tSet person substitution {} => {}", name, value);
285 ast.person.insert(name.to_string(), value.to_string());
286 }
287 "array" => {
288 debug!("\tSet array {} = {}", name, value);
289
290 // Did we have multiple parts to this array? (^Continues)
291 let parts = value.split("<crlf>");
292
293 // Process each row of array data independently.
294 let mut fields: Vec<String> = Vec::new();
295 for val in parts {
296 if val.contains("|") {
297 // Pipe-separated array (so the words can have spaces)
298 let mut other: Vec<String> =
299 val.split("|").map(str::to_string).collect();
300 fields.append(&mut other);
301 } else {
302 let mut other: Vec<String> =
303 val.split_whitespace().map(str::to_string).collect();
304 fields.append(&mut other);
305 }
306 }
307
308 // Convert any remaining '\s' escape sequences to spaces.
309 for field in fields.iter_mut() {
310 *field = field.replace("\\s", " ");
311 }
312
313 ast.arrays.insert(name.to_string(), fields);
314 }
315 &_ => {
316 warn!(
317 "Unknown definition type '{}' at {} line {}",
318 kind, filename, lineno,
319 );
320 }
321 }
322 }
323
324 // > Label
325 ">" => {
326 warn!("Found a >LABEL");
327
328 // The command looks like:
329 // > begin
330 // > topic random
331 // > object something perl
332 let mut fields: Vec<String> =
333 line.split_whitespace().map(str::to_string).collect();
334 if fields.len() == 0 {
335 continue;
336 }
337
338 // First field is always the kind (begin, topic, object)
339 let mut kind = fields.remove(0);
340
341 // Next field may be the name (of topic or object)
342 let mut name = String::from("");
343 if fields.len() > 0 {
344 name = fields.remove(0);
345 }
346
347 // BEGIN is a type of topic.
348 if kind == "begin" {
349 kind = String::from("topic");
350 name = String::from(crate::BEGIN_TOPIC);
351 }
352
353 // Handle the kinds of labels.
354 match kind.as_str() {
355 "topic" => {
356 ast.init_topic(&name);
357
358 // If we parsed a last trigger, commit and flush it
359 // ahead of the topic change.
360 if current_trigger.is_populated() {
361 debug!("Starting a new topic, commit the current trigger to topic {topic}: {:?}", current_trigger);
362 let t = ast.topics.get_mut(&topic).expect("or else");
363 t.add_trigger(current_trigger);
364 }
365 current_trigger = Trigger::new("");
366
367 // Set the pointer for triggers to enter this topic.
368 topic = name.to_string();
369
370 // Does this topic inherit or include another?
371 let mut mode = String::from("");
372 if fields.len() > 0 {
373 for field in fields {
374 if field == "includes" || field == "inherits" {
375 mode = field.to_string();
376 } else if mode == "includes" {
377 let t = ast.topics.get_mut(&topic).expect("or else");
378 t.set_includes(field.to_string());
379 } else if mode == "inherits" {
380 let t = ast.topics.get_mut(&topic).expect("or else");
381 t.set_inherits(field.to_string());
382 }
383 }
384 }
385 }
386 "object" => {
387 // Start of an object macro definition.
388 let mut language = String::from("");
389 if fields.len() > 0 {
390 language = fields.remove(0).to_lowercase();
391 }
392
393 // No language defined?
394 if language.len() == 0 {
395 warn!(
396 "No programming language defined for object '{}' at {} line {}",
397 name, filename, lineno,
398 );
399 in_object = true;
400 object_name = name;
401 object_language = language;
402 continue;
403 }
404
405 // Start reading the object code.
406 object_name = name;
407 object_language = language;
408 object_buffer.truncate(0);
409 in_object = true;
410 }
411 &_ => {
412 warn!(
413 "Unsupported >LABEL kind '{}' found at {} line {}",
414 kind, filename, lineno,
415 );
416 }
417 }
418 }
419
420 // < Label
421 "<" => {
422 let kind = line;
423
424 // If we were working on a trigger, commit it to AST now.
425 if current_trigger.is_populated() {
426 let t = ast.topics.get_mut(&topic).expect("or else");
427 t.add_trigger(current_trigger);
428 current_trigger = Trigger::new(&"");
429 }
430
431 if kind == "begin" || kind == "topic" {
432 topic = crate::DEFAULT_TOPIC.to_string();
433 }
434 }
435
436 // + Trigger
437 "+" => {
438 // Were we working on a previous trigger? If so, give it
439 // over to the AST and start a new one. We can't give it
440 // over NOW because we will need to own/modify it to
441 // add replies/conditions/etc.
442 if current_trigger.is_populated() {
443 let t = ast.topics.get_mut(&topic).expect("or else");
444 t.add_trigger(current_trigger);
445 }
446
447 current_trigger = Trigger::new(line.as_str());
448 }
449
450 // % Previous
451 "%" => {
452 current_trigger.previous = line.to_string();
453 }
454
455 // - Response
456 "-" => {
457 current_trigger.reply.push(line.to_string());
458 }
459
460 // * Condition
461 "*" => {
462 // Split everything apart.
463 let parts: Vec<String> = line.splitn(2, "=>").map(|s| s.to_string()).collect();
464 let condition = parts.get(0).unwrap().trim();
465 let reply = parts.get(1).map(|s| s.as_str()).unwrap_or("").trim();
466
467 // Parse the conditional side.
468 match crate::regex::CONDITION.captures(&condition) {
469 Some(caps) => {
470 let left = caps.get(1).unwrap().as_str();
471 let operator = caps.get(2).unwrap().as_str();
472 let right = caps.get(3).unwrap().as_str();
473
474 current_trigger.condition.push(crate::ast::Condition{
475 left: left.to_string(),
476 operator: operator.to_string(),
477 right: right.to_string(),
478 reply: reply.to_string(),
479 });
480 },
481 None => {
482 // TODO: raise syntax error.
483 },
484 }
485 }
486
487 // @ Redirect
488 "@" => {
489 current_trigger.redirect = line.to_string();
490 }
491
492 // ^ Continue was handled in lookahead above.
493 "^" => continue,
494
495 &_ => {
496 warn!(
497 "Unsupported RiveScript command '{}' found at {} line {}",
498 cmd, filename, lineno,
499 );
500 }
501 }
502 }
503
504 // If we had a final trigger ready to go, add it to the AST.
505 if current_trigger.is_populated() {
506 let t = ast.topics.get_mut(&topic).expect("or else");
507 t.add_trigger(current_trigger);
508 }
509
510 Ok(ast)
511 }
512}