pddl_parser/
lexer.rs

1use std::ops::Range;
2
3use derive_more::Display;
4use logos::Logos;
5use nom::InputLength;
6
7use crate::error::ParserError;
8
9/// All of the possible tokens in a PDDL file
10#[derive(Logos, Debug, Display, Clone, PartialEq)]
11#[logos(skip r"[ \t\n\f\r]+")]
12#[logos(error = ParserError)]
13pub enum Token {
14    /// A colon `:`
15    #[token(":")]
16    Colon,
17
18    /// An open bracket `[`
19    #[token("[")]
20    OpenBracket,
21
22    /// A close bracket `]`
23    #[token("]")]
24    CloseBracket,
25
26    /// An open parenthesis `(`
27    #[regex(r"\([ \t\n\f]*")]
28    OpenParen,
29
30    /// A close parenthesis `)`
31    #[regex(r"\)[ \t\n\f]*")]
32    CloseParen,
33
34    // PDDL Keywords
35    /// The `define` keyword
36    #[token("define", ignore(ascii_case))]
37    Define,
38
39    /// The `problem` keyword
40    #[token("problem", ignore(ascii_case))]
41    Problem,
42
43    /// The `:objects` keyword
44    #[token(":objects", ignore(ascii_case))]
45    Objects,
46
47    /// The `domain` keyword (without the colon, used in the domain file to define the domain)
48    #[token("domain", ignore(ascii_case))]
49    Domain,
50
51    /// The `:domain` keyword (with the colon, used in the problem file to specify the domain)
52    #[token(":domain", ignore(ascii_case))]
53    ProblemDomain,
54
55    /// The `:requirements` keyword
56    #[token(":requirements", ignore(ascii_case))]
57    Requirements,
58
59    /// The `:types` keyword
60    #[token(":types", ignore(ascii_case))]
61    Types,
62
63    /// The `:constants` keyword
64    #[token(":constants", ignore(ascii_case))]
65    Constants,
66
67    /// The `:predicates` keyword
68    #[token(":predicates", ignore(ascii_case))]
69    Predicates,
70
71    /// The `:functions` keyword
72    #[token(":functions", ignore(ascii_case))]
73    Functions,
74
75    /// The `:action` keyword
76    #[token(":action", ignore(ascii_case))]
77    Action,
78
79    /// The `:durative-action` keyword
80    #[token(":durative-action", ignore(ascii_case))]
81    DurativeAction,
82
83    /// The `:parameters` keyword
84    #[token(":parameters", ignore(ascii_case))]
85    Parameters,
86
87    /// The `:duration` keyword
88    #[token(":duration", ignore(ascii_case))]
89    Duration,
90
91    /// The `:precondition` keyword
92    #[token(":precondition", ignore(ascii_case))]
93    Precondition,
94
95    /// The `:condition` keyword
96    #[token(":condition", ignore(ascii_case))]
97    Condition,
98
99    /// The `:effect` keyword
100    #[token(":effect", ignore(ascii_case))]
101    Effect,
102
103    /// The `:init` keyword
104    #[token(":init", ignore(ascii_case))]
105    Init,
106
107    /// The `:goal` keyword
108    #[token(":goal", ignore(ascii_case))]
109    Goal,
110
111    /// The `and` keyword
112    #[token("and", ignore(ascii_case))]
113    And,
114
115    /// The `not` keyword
116    #[token("not", ignore(ascii_case))]
117    Not,
118
119    /// The `either` keyword
120    #[token("either", ignore(ascii_case))]
121    Either,
122
123    /// The `assign` keyword
124    #[token("assign", ignore(ascii_case))]
125    Assign,
126
127    /// The `scale-up` keyword
128    #[token("scale-up", ignore(ascii_case))]
129    ScaleUp,
130
131    /// The `scale-down` keyword
132    #[token("scale-down", ignore(ascii_case))]
133    ScaleDown,
134
135    /// The `increase` keyword
136    #[token("increase", ignore(ascii_case))]
137    Increase,
138
139    /// The `decrease` keyword
140    #[token("decrease", ignore(ascii_case))]
141    Decrease,
142
143    /// The `forall` keyword
144    #[token("forall", ignore(ascii_case))]
145    Forall,
146
147    /// The `at` keyword
148    #[token("at", ignore(ascii_case))]
149    At,
150
151    /// The `over` keyword
152    #[token("over", ignore(ascii_case))]
153    Over,
154
155    /// The `all` keyword
156    #[token("all", ignore(ascii_case))]
157    All,
158
159    /// The `start` keyword
160    #[token("start", ignore(ascii_case))]
161    Start,
162
163    /// The `end` keyword
164    #[token("end", ignore(ascii_case))]
165    End,
166
167    /// A number (positive or negative, e.g. `1` or `-1`)
168    #[regex(r"-?[0-9]+", |lex| lex.slice().parse())]
169    Integer(i64),
170
171    /// A floating point number (e.g. `1.0`)
172    #[regex(r"[0-9]+\.[0-9]+", |lex| lex.slice().parse())]
173    Float(f64),
174
175    // Math operators
176    /// The `+` operator
177    #[token("+")]
178    Plus,
179
180    /// The `*` operator
181    #[token("*")]
182    Times,
183
184    /// The `-` operator
185    #[token("/")]
186    Divide,
187
188    /// The `=` operator
189    #[token("=")]
190    Equal,
191
192    /// The `:strips` requirement (PDDL 1)
193    #[token(":strips", ignore(ascii_case))]
194    Strips,
195
196    /// The `:typing` requirement (PDDL 1)
197    #[token(":typing", ignore(ascii_case))]
198    Typing,
199
200    /// The `:negative-preconditions` requirement (PDDL 1)
201    #[token(":disjunctive-preconditions", ignore(ascii_case))]
202    DisjunctivePreconditions,
203
204    /// The `:disjunctive-preconditions` requirement (PDDL 1)
205    #[token(":equality", ignore(ascii_case))]
206    Equality,
207
208    /// The `:existential-preconditions` requirement (PDDL 1)
209    #[token(":existential-preconditions", ignore(ascii_case))]
210    ExistentialPreconditions,
211
212    /// The `:universal-preconditions` requirement (PDDL 1)
213    #[token(":universal-preconditions", ignore(ascii_case))]
214    UniversalPreconditions,
215
216    /// The `:quantified-preconditions` requirement (PDDL 1)
217    #[token(":quantified-preconditions", ignore(ascii_case))]
218    QuantifiedPreconditions,
219
220    /// The `:conditional-effects` requirement (PDDL 1)
221    #[token(":conditional-effects", ignore(ascii_case))]
222    ConditionalEffects,
223
224    /// The `:action-expansions` requirement (PDDL 1)
225    #[token(":action-expansions", ignore(ascii_case))]
226    ActionExpansions,
227
228    /// The `:foreach-expansions` requirement (PDDL 1)
229    #[token(":foreach-expansions", ignore(ascii_case))]
230    ForeachExpansions,
231
232    /// The `:dag-expansions` requirement (PDDL 1)
233    #[token(":dag-expansions", ignore(ascii_case))]
234    DagExpansions,
235
236    /// The `:domain-axioms` requirement (PDDL 1)
237    #[token(":domain-axioms", ignore(ascii_case))]
238    DomainAxioms,
239
240    /// The `:subgoals-through-axioms` requirement (PDDL 1)
241    #[token(":subgoals-through-axioms", ignore(ascii_case))]
242    SubgoalsThroughAxioms,
243
244    /// The `:safety-constraints` requirement (PDDL 1)
245    #[token(":safety-constraints", ignore(ascii_case))]
246    SafetyConstraints,
247
248    /// The `:expression-evaluation` requirement (PDDL 1)
249    #[token(":expression-evaluation", ignore(ascii_case))]
250    ExpressionEvaluation,
251
252    /// The `:fluents` requirement (PDDL 1)
253    #[token(":fluents", ignore(ascii_case))]
254    Fluents,
255
256    /// The `:open-world` requirement (PDDL 1)
257    #[token(":open-world", ignore(ascii_case))]
258    OpenWorld,
259
260    /// The `:true-negation` requirement (PDDL 1)
261    #[token(":true-negation", ignore(ascii_case))]
262    TrueNegation,
263
264    /// The `:adl` requirement (PDDL 1)
265    #[token(":adl", ignore(ascii_case))]
266    Adl,
267
268    /// The `:ucpop` requirement (PDDL 1)
269    #[token(":ucpop", ignore(ascii_case))]
270    Ucpop,
271
272    // PDDL 2.1
273    /// The `:numeric-fluents` requirement (PDDL 2.1)
274    #[token(":numeric-fluents", ignore(ascii_case))]
275    NumericFluents,
276
277    /// The `:durative-actions` requirement (PDDL 2.1)
278    #[token(":durative-actions", ignore(ascii_case))]
279    DurativeActions,
280
281    /// The `:durative-inequalities` (or, as a typo, `:duration-inequalities`) requirement (PDDL 2.1)
282    #[regex(r":durative-inequalities", ignore(ascii_case))]
283    #[regex(r":duration-inequalities", ignore(ascii_case))]
284    DurativeInequalities,
285
286    /// The `:continuous-effects` requirement (PDDL 2.1)
287    #[token(":continuous-effects", ignore(ascii_case))]
288    ContinuousEffects,
289
290    /// The `:negative-preconditions` requirement (PDDL 2.1)
291    #[token(":negative-preconditions", ignore(ascii_case))]
292    NegativePreconditions,
293
294    // PDDL 2.2
295    /// The `:derived-predicates` requirement (PDDL 2.2)
296    #[token(":derived-predicates", ignore(ascii_case))]
297    DerivedPredicates,
298
299    /// The `:timed-initial-literals` requirement (PDDL 2.2)
300    #[token(":timed-initial-literals", ignore(ascii_case))]
301    TimedInitialLiterals,
302
303    // PDDL 3
304    /// The `:preferences` requirement (PDDL 3)
305    #[token(":preferences", ignore(ascii_case))]
306    Preferences,
307
308    /// The `:constraints` requirement (PDDL 3)
309    #[token(":constraints", ignore(ascii_case))]
310    Constraints,
311
312    // PDDL 3.1
313    /// The `:action-costs` requirement (PDDL 3.1)
314    #[token(":action-costs", ignore(ascii_case))]
315    ActionCosts,
316
317    /// The `:goal-utilities` requirement (PDDL 3.1)
318    #[token(":goal-utilities", ignore(ascii_case))]
319    GoalUtilities,
320
321    // PDDL+
322    /// The `:time` requirement (PDDL+)
323    #[token(":time", ignore(ascii_case))]
324    Time,
325
326    // PDDL Identifier
327    /// A PDDL identifier (a sequence of letters, digits, underscores, and hyphens, starting with a letter)
328    #[regex(r"[a-zA-Z][a-zA-Z0-9_\-]*", |lex| lex.slice().to_string())]
329    Id(String),
330
331    // PDDL Variable
332    /// A PDDL variable (a sequence of letters, digits, underscores, and hyphens, starting with a question mark)
333    #[regex(r"\?[a-zA-Z][a-zA-Z0-9_\-]*", |lex| lex.slice().to_string())]
334    Var(String),
335
336    // Dash
337    /// A dash (`-`) character that can represent a minus sign or a hyphen
338    #[token("-")]
339    Dash,
340
341    // Comments
342    /// A comment (a semicolon followed by any characters). The comment is ignored.
343    #[regex(r";.*", logos::skip)]
344    Comment,
345
346    // Packages
347    /// A package declaration (a sequence of characters enclosed in parentheses, starting with `in-package`). The package name is ignored.
348    #[regex(r#"\(\s*in-package\s+("[^"]*"|[^)\s]*)\)"#, logos::skip)]
349    Package,
350}
351
352/// A stream of tokens. This is a wrapper around a [`logos::Lexer`]. It implements [`Clone`], so it can be cloned and used to peek ahead. It also implements [`Iterator`], so it can be used to iterate over the tokens.
353#[derive(Debug)]
354pub struct TokenStream<'a> {
355    lexer: logos::Lexer<'a, Token>,
356}
357
358impl Clone for TokenStream<'_> {
359    fn clone(&self) -> Self {
360        Self {
361            lexer: self.lexer.clone(),
362        }
363    }
364}
365
366impl<'a> TokenStream<'a> {
367    /// Creates a new token stream from the given input string. The input string is not copied, so it must outlive the token stream.
368    pub fn new(input: &'a str) -> Self {
369        Self {
370            lexer: Token::lexer(input),
371        }
372    }
373
374    /// Returns the remaining input string.
375    pub fn len(&self) -> usize {
376        self.lexer.source().len() - self.lexer.span().end
377    }
378
379    /// Returns the number of remaining tokens in the stream.
380    pub fn count(&self) -> usize {
381        self.lexer.clone().spanned().count()
382    }
383
384    /// Returns `true` if the token stream is empty.
385    pub fn is_empty(&self) -> bool {
386        self.count() == 0
387    }
388
389    /// Returns the next token in the stream, or `None` if the stream is empty.
390    pub fn peek(&self) -> Option<(Result<Token, ParserError>, &'a str)> {
391        let mut iter = self.lexer.clone().spanned();
392        iter.next().map(|(t, span)| (t, &self.lexer.source()[span]))
393    }
394
395    /// Returns the next `n` tokens in the stream. If there are fewer than `n` tokens left, returns the remaining tokens. If the stream is empty, returns `None`.
396    pub fn peek_n(&self, n: usize) -> Option<Vec<(Result<Token, ParserError>, String)>> {
397        let mut iter = self.lexer.clone().spanned();
398        let mut tokens = Vec::new();
399        for _ in 0..n {
400            match iter.next() {
401                Some((t, span)) => tokens.push((t, self.lexer.source()[span].to_string())),
402                None => return if tokens.is_empty() { None } else { Some(tokens) },
403            }
404        }
405        Some(tokens)
406    }
407
408    /// Skips the next token in the stream.
409    pub fn advance(mut self) -> Self {
410        self.lexer.next();
411        self
412    }
413
414    /// Returns the span of the current token.
415    pub fn span(&self) -> Range<usize> {
416        self.lexer.span()
417    }
418}
419
420impl<'a> nom::Parser<TokenStream<'a>, &'a str, ParserError> for Token {
421    fn parse(&mut self, input: TokenStream<'a>) -> nom::IResult<TokenStream<'a>, &'a str, ParserError> {
422        match input.peek() {
423            Some((Ok(t), s)) if t == *self => Ok((input.advance(), s)),
424            _ => Err(nom::Err::Error(ParserError::ExpectedToken(
425                self.clone(),
426                input.span(),
427                input.peek_n(30),
428            ))),
429        }
430    }
431}
432
433impl ToString for TokenStream<'_> {
434    fn to_string(&self) -> String {
435        self.lexer.source().to_string()
436    }
437}
438
439impl<'a> From<&'a str> for TokenStream<'a> {
440    fn from(s: &'a str) -> Self {
441        Self::new(s)
442    }
443}
444
445impl<'a> From<&'a String> for TokenStream<'a> {
446    fn from(s: &'a String) -> Self {
447        Self::new(s)
448    }
449}
450
451impl InputLength for TokenStream<'_> {
452    fn input_len(&self) -> usize {
453        self.len()
454    }
455}