Skip to main content

rivescript_core/
parser.rs

1use std::collections::HashMap;
2
3use crate::ast::{Object, Trigger, AST};
4use log::{debug, warn};
5use Result::Ok;
6
7/// The version of the RiveScript spec we support.
8///
9/// This is compared against the `! version = 2.0` command which may be found in the
10/// RiveScript code being parsed. If the version command is present and greater than
11/// this number, the Parser will return an error.
12const RIVESCRIPT_SPEC_VERSION: f32 = 2.0;
13
14/// The RiveScript language parser.
15pub struct Parser {}
16
17enum ConcatMode {
18    None,
19    Newline,
20    Space,
21}
22
23impl ConcatMode {
24    fn to_string(&self) -> &'static str {
25        match self {
26            ConcatMode::None => "",
27            ConcatMode::Newline => "\n",
28            ConcatMode::Space => " ",
29        }
30    }
31
32    fn parse(v: &str) -> Option<Self> {
33        match v {
34            "none" => Some(ConcatMode::None),
35            "newline" => Some(ConcatMode::Newline),
36            "space" => Some(ConcatMode::Space),
37            _ => None,
38        }
39    }
40}
41
42impl Parser {
43    /// Create a new instance of the parser. It takes no parameters.
44    pub fn new() -> Self {
45        Self {}
46    }
47
48    /// Parse RiveScript source code and return the Abstract Syntax Tree.
49    ///
50    /// The filename is used only for syntax error reporting (so the filename and line number
51    /// can be included in the error).
52    pub fn parse(&self, filename: &str, contents: String) -> Result<AST, String> {
53        debug!("BEGIN PARSE ON FILENAME: {}", filename);
54
55        // Start building an AST parsed from these files.
56        let mut ast = AST::new();
57
58        // Local (file-scoped) parser options.
59        let mut local_options: HashMap<String, String> = HashMap::new();
60        local_options.insert("concat".to_string(), "none".to_string());
61        let mut concat_mode = ConcatMode::None;
62
63        // Some temporary state variables as we parse this file.
64        let mut topic = String::from(crate::DEFAULT_TOPIC);
65        let mut current_trigger = Trigger::new("");
66        let mut lineno: usize = 0;
67        let mut in_comment = false;
68        let mut in_object = false;
69        let mut object_name = String::from("");
70        let mut object_language = String::from("");
71        let mut object_buffer: Vec<String> = Vec::new();
72
73        // Initialize the "random" topic.
74        ast.init_topic(&topic);
75
76        // Go through the lines of code.
77        // let mut lines = contents.lines();
78        let lines: Vec<String> = contents.lines().map(|s| s.to_string()).collect();
79        loop {
80            if lineno >= lines.len() {
81                break;
82            }
83
84            let mut line = lines[lineno].to_string();
85            lineno += 1;
86
87            // Strip the line (skip empty lines).
88            line = line.trim().to_string();
89            if line.len() == 0 {
90                continue;
91            }
92
93            // Are we inside of a `> object` macro?
94            if in_object {
95                // Have we reached the end?
96                if line.contains("< object") || line.contains("<object") {
97                    if object_name.len() > 0 {
98                        let new_object =
99                            Object::new(&object_name, &object_language, object_buffer.to_owned());
100                        ast.objects.insert(object_name.to_string(), new_object);
101                        in_object = false;
102                    }
103                } else {
104                    object_buffer.push(line);
105                }
106                continue;
107            }
108
109            // Handle and ignore comments.
110            if line.starts_with("//") {
111                continue; // single-line comment.
112            } else if line.starts_with("/*") {
113                // Start of a multi-line comment block.
114                if line.contains("*/") {
115                    continue; // The end is on the same line!
116                }
117
118                // Now inside a comment block.
119                in_comment = true;
120                continue;
121            } else if line.contains("*/") {
122                // End of a multi-line comment block.
123                in_comment = false;
124                continue;
125            } else if in_comment {
126                continue;
127            }
128
129            debug!("Line #{}: {}", lineno, line);
130
131            // Separate the command from its data.
132            if line.len() < 2 {
133                warn!(
134                    "Weird single-character line '{}' found at {} line {}",
135                    line, filename, lineno,
136                );
137                continue;
138            }
139            let cmd = &line[..1];
140            let mut line = line[1..].trim().to_string();
141
142            // Ignore inline comments at the end of the line.
143            if line.contains(" // ") {
144                let mut splitter = line.splitn(2, " // ");
145                line = splitter.next().unwrap_or("").to_string();
146            }
147
148            // Do a look-ahead for ^Continue and %Previous commands.
149            if cmd != "^" {
150                let mut li = lineno;
151                loop {
152                    if li >= lines.len() {
153                        break;
154                    }
155
156                    let lookahead = lines[li].trim();
157                    li += 1;
158                    if lookahead.len() < 2 {
159                        continue;
160                    }
161
162                    let look_cmd = &lookahead[..1];
163                    let lookahead = lookahead[1..].trim();
164
165                    // We only care about a couple of lookahead command types.
166                    if look_cmd != "^" || lookahead.len() == 0 {
167                        break;
168                    }
169
170                    // If our parent command is a ! and the next command(s) are ^,
171                    // we'll tack each extension on as a "fake line break" (which
172                    // is useful information for !arrays especially)
173                    if cmd == "!" {
174                        if look_cmd == "^" {
175                            line.push_str("<crlf>");
176                            line.push_str(lookahead);
177                        }
178                        continue;
179                    }
180
181                    // Concatenate ^Continue lines with the current concat mode characters.
182                    if cmd != "^" && look_cmd == "^" {
183                        line = format!("{line}{}{lookahead}", concat_mode.to_string());
184                    }
185                }
186            }
187
188            // Handle the types of RiveScript commands.
189            match cmd {
190                // !Definition
191                "!" => {
192                    warn!("Found a !DEFINITION");
193
194                    // The command looks like:
195                    // ! version = 2.0
196                    // ! global depth = 50
197                    // ! var name = Chatbot
198                    // ! sub who's = who is
199                    let mut halves = line.splitn(2, "=");
200                    let left = halves.next().unwrap_or("").trim();
201                    let right = halves.next().unwrap_or("").trim();
202                    let mut value = String::from("");
203                    let mut kind = ""; // global, var, sub, ...
204                    let mut name = "";
205
206                    if right.len() > 0 {
207                        // The right half of the = sign is always the value.
208                        value.push_str(right);
209                    }
210                    if left.len() >= 1 {
211                        // The left half has the kind and maybe the name.
212                        // If `! version` there is only the kind=version,
213                        // everything else has a name.
214                        if left.contains(" ") {
215                            let mut halves = left.splitn(2, " ");
216                            kind = halves.next().unwrap_or("").trim();
217                            name = halves.next().unwrap_or("").trim();
218                        } else {
219                            kind = left;
220                        }
221                    }
222
223                    // Remove 'fake' line breaks unless this is an array.
224                    if kind != "array" {
225                        value = value.replace("<crlf>", concat_mode.to_string());
226                    }
227
228                    // Handle RiveScript specification version checks.
229                    if kind == "version" {
230                        warn!("Found a version str: {}", value);
231                        let version = value.parse::<f32>().unwrap_or(0.0);
232                        if version == 0.0 {
233                            return Err(
234                                "Didn't parse version string; was it a properly formatted number?".to_string(),
235                            );
236                        } else if version > RIVESCRIPT_SPEC_VERSION {
237                            return Err(
238                                "This RiveScript document declares a `! version` number higher than we support".to_string(),
239                            );
240                        } else {
241                            ast.version = version;
242                        }
243                        continue;
244                    }
245
246                    // All other types of defines require a value and a name.
247                    if name.len() == 0 {
248                        warn!("Undefined variable name at {} line {}", filename, lineno);
249                        continue;
250                    } else if value.len() == 0 {
251                        warn!("Undefined variable value at {} line {}", filename, lineno);
252                        continue;
253                    }
254
255                    // Handle the rest of the !Define types.
256                    match kind {
257                        "local" => {
258                            debug!("\tSet local parser option {} = {}", name, value);
259                            local_options.insert(name.to_string(), value.to_string());
260
261                            // Changing the ^Continue concatenation mode?
262                            if name == "concat" {
263                                if let Some(v) = ConcatMode::parse(&value) {
264                                    concat_mode = v;
265                                } else {
266                                    warn!("Invalid value for '! local concat': '{value}'");
267                                    concat_mode = ConcatMode::None;
268                                }
269                            }
270                        }
271                        "global" => {
272                            debug!("\tSet global {} = {}", name, value);
273                            ast.set_global(name, &value);
274                        }
275                        "var" => {
276                            debug!("\tSet bot variable {} = {}", name, value);
277                            ast.set_bot_var(name, &value);
278                        }
279                        "sub" => {
280                            debug!("\tSet substitution {} => {}", name, value);
281                            ast.subs.insert(name.to_string(), value.to_string());
282                        }
283                        "person" => {
284                            debug!("\tSet person substitution {} => {}", name, value);
285                            ast.person.insert(name.to_string(), value.to_string());
286                        }
287                        "array" => {
288                            debug!("\tSet array {} = {}", name, value);
289
290                            // Did we have multiple parts to this array? (^Continues)
291                            let parts = value.split("<crlf>");
292
293                            // Process each row of array data independently.
294                            let mut fields: Vec<String> = Vec::new();
295                            for val in parts {
296                                if val.contains("|") {
297                                    // Pipe-separated array (so the words can have spaces)
298                                    let mut other: Vec<String> =
299                                        val.split("|").map(str::to_string).collect();
300                                    fields.append(&mut other);
301                                } else {
302                                    let mut other: Vec<String> =
303                                        val.split_whitespace().map(str::to_string).collect();
304                                    fields.append(&mut other);
305                                }
306                            }
307
308                            // Convert any remaining '\s' escape sequences to spaces.
309                            for field in fields.iter_mut() {
310                                *field = field.replace("\\s", " ");
311                            }
312
313                            ast.arrays.insert(name.to_string(), fields);
314                        }
315                        &_ => {
316                            warn!(
317                                "Unknown definition type '{}' at {} line {}",
318                                kind, filename, lineno,
319                            );
320                        }
321                    }
322                }
323
324                // > Label
325                ">" => {
326                    warn!("Found a >LABEL");
327
328                    // The command looks like:
329                    // > begin
330                    // > topic random
331                    // > object something perl
332                    let mut fields: Vec<String> =
333                        line.split_whitespace().map(str::to_string).collect();
334                    if fields.len() == 0 {
335                        continue;
336                    }
337
338                    // First field is always the kind (begin, topic, object)
339                    let mut kind = fields.remove(0);
340
341                    // Next field may be the name (of topic or object)
342                    let mut name = String::from("");
343                    if fields.len() > 0 {
344                        name = fields.remove(0);
345                    }
346
347                    // BEGIN is a type of topic.
348                    if kind == "begin" {
349                        kind = String::from("topic");
350                        name = String::from(crate::BEGIN_TOPIC);
351                    }
352
353                    // Handle the kinds of labels.
354                    match kind.as_str() {
355                        "topic" => {
356                            ast.init_topic(&name);
357
358                            // If we parsed a last trigger, commit and flush it
359                            // ahead of the topic change.
360                            if current_trigger.is_populated() {
361                                debug!("Starting a new topic, commit the current trigger to topic {topic}: {:?}", current_trigger);
362                                let t = ast.topics.get_mut(&topic).expect("or else");
363                                t.add_trigger(current_trigger);
364                            }
365                            current_trigger = Trigger::new("");
366
367                            // Set the pointer for triggers to enter this topic.
368                            topic = name.to_string();
369
370                            // Does this topic inherit or include another?
371                            let mut mode = String::from("");
372                            if fields.len() > 0 {
373                                for field in fields {
374                                    if field == "includes" || field == "inherits" {
375                                        mode = field.to_string();
376                                    } else if mode == "includes" {
377                                        let t = ast.topics.get_mut(&topic).expect("or else");
378                                        t.set_includes(field.to_string());
379                                    } else if mode == "inherits" {
380                                        let t = ast.topics.get_mut(&topic).expect("or else");
381                                        t.set_inherits(field.to_string());
382                                    }
383                                }
384                            }
385                        }
386                        "object" => {
387                            // Start of an object macro definition.
388                            let mut language = String::from("");
389                            if fields.len() > 0 {
390                                language = fields.remove(0).to_lowercase();
391                            }
392
393                            // No language defined?
394                            if language.len() == 0 {
395                                warn!(
396                                    "No programming language defined for object '{}' at {} line {}",
397                                    name, filename, lineno,
398                                );
399                                in_object = true;
400                                object_name = name;
401                                object_language = language;
402                                continue;
403                            }
404
405                            // Start reading the object code.
406                            object_name = name;
407                            object_language = language;
408                            object_buffer.truncate(0);
409                            in_object = true;
410                        }
411                        &_ => {
412                            warn!(
413                                "Unsupported >LABEL kind '{}' found at {} line {}",
414                                kind, filename, lineno,
415                            );
416                        }
417                    }
418                }
419
420                // < Label
421                "<" => {
422                    let kind = line;
423
424                    // If we were working on a trigger, commit it to AST now.
425                    if current_trigger.is_populated() {
426                        let t = ast.topics.get_mut(&topic).expect("or else");
427                        t.add_trigger(current_trigger);
428                        current_trigger = Trigger::new(&"");
429                    }
430
431                    if kind == "begin" || kind == "topic" {
432                        topic = crate::DEFAULT_TOPIC.to_string();
433                    }
434                }
435
436                // + Trigger
437                "+" => {
438                    // Were we working on a previous trigger? If so, give it
439                    // over to the AST and start a new one. We can't give it
440                    // over NOW because we will need to own/modify it to
441                    // add replies/conditions/etc.
442                    if current_trigger.is_populated() {
443                        let t = ast.topics.get_mut(&topic).expect("or else");
444                        t.add_trigger(current_trigger);
445                    }
446
447                    current_trigger = Trigger::new(line.as_str());
448                }
449
450                // % Previous
451                "%" => {
452                    current_trigger.previous = line.to_string();
453                }
454
455                // - Response
456                "-" => {
457                    current_trigger.reply.push(line.to_string());
458                }
459
460                // * Condition
461                "*" => {
462                    // Split everything apart.
463                    let parts: Vec<String> = line.splitn(2, "=>").map(|s| s.to_string()).collect();
464                    let condition = parts.get(0).unwrap().trim();
465                    let reply = parts.get(1).map(|s| s.as_str()).unwrap_or("").trim();
466
467                    // Parse the conditional side.
468                    match crate::regex::CONDITION.captures(&condition) {
469                        Some(caps) => {
470                            let left = caps.get(1).unwrap().as_str();
471                            let operator = caps.get(2).unwrap().as_str();
472                            let right = caps.get(3).unwrap().as_str();
473
474                            current_trigger.condition.push(crate::ast::Condition{
475                                left: left.to_string(),
476                                operator: operator.to_string(),
477                                right: right.to_string(),
478                                reply: reply.to_string(),
479                            });
480                        },
481                        None => {
482                            // TODO: raise syntax error.
483                        },
484                    }
485                }
486
487                // @ Redirect
488                "@" => {
489                    current_trigger.redirect = line.to_string();
490                }
491
492                // ^ Continue was handled in lookahead above.
493                "^" => continue,
494
495                &_ => {
496                    warn!(
497                        "Unsupported RiveScript command '{}' found at {} line {}",
498                        cmd, filename, lineno,
499                    );
500                }
501            }
502        }
503
504        // If we had a final trigger ready to go, add it to the AST.
505        if current_trigger.is_populated() {
506            let t = ast.topics.get_mut(&topic).expect("or else");
507            t.add_trigger(current_trigger);
508        }
509
510        Ok(ast)
511    }
512}