query_parser/
lib.rs

1use std::borrow::Cow;
2
3#[derive(Debug, PartialEq)]
4pub struct Query {
5    pub raw_query: String,
6    pub terms: Vec<Term>,
7}
8
9#[derive(Debug, PartialEq)]
10pub enum TermValue {
11    Simple(String),
12    // in the future, other types like List or Range might be added
13}
14
15#[derive(Debug, PartialEq)]
16pub struct Term {
17    pub negated: bool,
18    pub key: Option<String>,
19    pub value: TermValue,
20}
21
22impl Term {
23    fn new<S: Into<String>>(negated: bool, key: Option<S>, value: S) -> Self {
24        Term {
25            negated,
26            key: key.map(S::into),
27            value: TermValue::Simple(value.into()),
28        }
29    }
30
31    fn from_value<S: Into<String>>(value: S) -> Self {
32        Term {
33            negated: false,
34            key: None,
35            value: TermValue::Simple(value.into()),
36        }
37    }
38}
39
40#[derive(Debug)]
41pub struct ParseOptions {
42    allow_backslash_quotes: bool,
43    allow_unicode_escapes: bool,
44}
45
46impl ParseOptions {
47    pub fn default() -> Self {
48        Self {
49            allow_backslash_quotes: true,
50            allow_unicode_escapes: true,
51        }
52    }
53
54    /// Allows single- and double-quote characters to be escaped inside quoted strings by
55    /// preceding them with a backslash. Similarly, backslash characters themselves can be
56    /// escaped inside quoted strings by preceding them with a backslash. Note that if a
57    /// backslash is followed by any another character, or is inside a non-quoted term, then
58    /// it is treated as a literal backslash.
59    pub fn allow_backslash_quotes(&mut self, allow: bool) -> &mut ParseOptions {
60        self.allow_backslash_quotes = allow;
61        self
62    }
63
64    /// Allows `\uXXXXXX` unicode escapes in term values. Each X must be a hexadecimal
65    /// character, and all six are required (use zero-padding as needed). To represent
66    /// a string like `\u001234` literally in a query string, one or more of the
67    /// characters can be escaped via the unicode escape sequence; in this example
68    /// `\u00005Cu001234` can be used to escape the initial backslash character
69    /// with the equivalent unicode escape.
70    pub fn allow_unicode_escapes(&mut self, allow: bool) -> &mut ParseOptions {
71        self.allow_unicode_escapes = allow;
72        self
73    }
74}
75
76pub fn parse(raw: &str) -> Query {
77    parse_with_options(raw, &ParseOptions::default())
78}
79
80pub fn parse_with_options(raw: &str, opts: &ParseOptions) -> Query {
81    Query {
82        raw_query: String::from(raw),
83        terms: parse_terms(raw, opts),
84    }
85}
86
87#[derive(Clone, Copy, Debug)]
88enum ParseState {
89    Initial, // before a new term is processed
90    Negated, // term started with a '-' character
91    SingleQuote, // term started with a single quote
92    DoubleQuote, // term started with a double quote
93    SingleQuoteEscape, // encountered backslash inside a single-quoted term
94    DoubleQuoteEscape, // encountered backslash inside a double-quoted term
95    RawToken, // term started without quoting
96    NegatedSingleQuote, // term started with a '-' followed by a single quote
97    NegatedDoubleQuote, // term started with a '-' followed by a double quote
98    NegatedSingleQuoteEscape, // encountered backslash inside NegatedSingleQuote
99    NegatedDoubleQuoteEscape, // encountered backslash inside NegatedDoubleQuote
100    NegatedRawToken, // term started with a '-' followed by unquoted characters
101    Value, // after encountering the ':' to separate key from value
102    NegatedValue, // after encountering the ':' to separated negated key from value
103    RawValue, // Once the value has been determined to be unquoted
104    SingleQuotedValue, // Once the value has been determined to be single-quoted
105    DoubleQuotedValue, // Once the value has been determined to be double-quoted
106    SingleQuotedValueEscape, // Encountered backslash inside SingleQuotedValue
107    DoubleQuotedValueEscape, // Encountered backslash inside DoubleQuotedValue
108    NegatedRawValue, // Once the value for a negated term has been determined to be unquoted
109    NegatedSingleQuotedValue, // Once the value for a negated term has been determined to be single-quoted
110    NegatedDoubleQuotedValue, // Once the value for a negated term has been determined to be double-quoted
111    NegatedSingleQuotedValueEscape, // Encountered backslash inside NegatedSingleQuotedValue
112    NegatedDoubleQuotedValueEscape, // Encountered backslash inside NegatedDoubleQuotedValue
113}
114
115impl ParseState {
116    fn is_negated(&self) -> bool {
117        match self {
118            Self::Negated |
119            Self::NegatedSingleQuote |
120            Self::NegatedDoubleQuote |
121            Self::NegatedSingleQuoteEscape |
122            Self::NegatedDoubleQuoteEscape |
123            Self::NegatedRawToken |
124            Self::NegatedValue |
125            Self::NegatedRawValue |
126            Self::NegatedSingleQuotedValue |
127            Self::NegatedDoubleQuotedValue |
128            Self::NegatedSingleQuotedValueEscape |
129            Self::NegatedDoubleQuotedValueEscape => true,
130            _ => false,
131        }
132    }
133
134    fn is_single_quote(&self) -> bool {
135        match self {
136            Self::SingleQuote |
137            Self::SingleQuoteEscape |
138            Self::NegatedSingleQuote |
139            Self::NegatedSingleQuoteEscape |
140            Self::SingleQuotedValue |
141            Self::SingleQuotedValueEscape |
142            Self::NegatedSingleQuotedValue |
143            Self::NegatedSingleQuotedValueEscape => true,
144            _ => false,
145        }
146    }
147
148    fn escape(&self) -> Self {
149        match self {
150            ParseState::SingleQuote => ParseState::SingleQuoteEscape,
151            ParseState::DoubleQuote => ParseState::DoubleQuoteEscape,
152            ParseState::NegatedSingleQuote => ParseState::NegatedSingleQuoteEscape,
153            ParseState::NegatedDoubleQuote => ParseState::NegatedDoubleQuoteEscape,
154            ParseState::SingleQuotedValue => ParseState::SingleQuotedValueEscape,
155            ParseState::DoubleQuotedValue => ParseState::DoubleQuotedValueEscape,
156            ParseState::NegatedSingleQuotedValue => ParseState::NegatedSingleQuotedValueEscape,
157            ParseState::NegatedDoubleQuotedValue => ParseState::NegatedDoubleQuotedValueEscape,
158            _ => panic!("Unescapable state"),
159        }
160    }
161
162    fn unescape(&self) -> Self {
163        match self {
164            ParseState::SingleQuoteEscape => ParseState::SingleQuote,
165            ParseState::DoubleQuoteEscape => ParseState::DoubleQuote,
166            ParseState::NegatedSingleQuoteEscape => ParseState::NegatedSingleQuote,
167            ParseState::NegatedDoubleQuoteEscape => ParseState::NegatedDoubleQuote,
168            ParseState::SingleQuotedValueEscape => ParseState::SingleQuotedValue,
169            ParseState::DoubleQuotedValueEscape => ParseState::DoubleQuotedValue,
170            ParseState::NegatedSingleQuotedValueEscape => ParseState::NegatedSingleQuotedValue,
171            ParseState::NegatedDoubleQuotedValueEscape => ParseState::NegatedDoubleQuotedValue,
172            _ => panic!("Unescaped state"),
173        }
174    }
175
176    fn is_escaped(&self) -> bool {
177        match self {
178            Self::SingleQuoteEscape |
179            Self::DoubleQuoteEscape |
180            Self::NegatedSingleQuoteEscape |
181            Self::NegatedDoubleQuoteEscape |
182            Self::SingleQuotedValueEscape |
183            Self::DoubleQuotedValueEscape |
184            Self::NegatedSingleQuotedValueEscape |
185            Self::NegatedDoubleQuotedValueEscape => true,
186            _ => false,
187        }
188    }
189}
190
191fn hex_to_nybble(hex: u8) -> u32 {
192    match hex {
193        b'0'..=b'9' => (hex - b'0').into(),
194        b'a'..=b'f' => (hex - b'a' + 10).into(),
195        b'A'..=b'F' => (hex - b'A' + 10).into(),
196        _ => panic!("Not a hex character!"),
197    }
198}
199
200fn decode_unicode_escape(s: &str, ix: usize) -> Option<char> {
201    let bytes = s.as_bytes();
202    if ix + 7 < s.len() &&
203        bytes[ix + 1] == b'u' &&
204        bytes[ix + 2].is_ascii_hexdigit() &&
205        bytes[ix + 3].is_ascii_hexdigit() &&
206        bytes[ix + 4].is_ascii_hexdigit() &&
207        bytes[ix + 5].is_ascii_hexdigit() &&
208        bytes[ix + 6].is_ascii_hexdigit() &&
209        bytes[ix + 7].is_ascii_hexdigit()
210    {
211        let uchar = (hex_to_nybble(bytes[ix + 2]) << 20) |
212            (hex_to_nybble(bytes[ix + 3]) << 16) |
213            (hex_to_nybble(bytes[ix + 4]) << 12) |
214            (hex_to_nybble(bytes[ix + 5]) << 8) |
215            (hex_to_nybble(bytes[ix + 6]) << 4) |
216            (hex_to_nybble(bytes[ix + 7]));
217        return std::char::from_u32(uchar);
218    }
219
220    None
221}
222
223fn decode_unicode_escapes<'a>(mut s: &'a str) -> Cow<'a, str> {
224    let mut ret = Cow::Borrowed(s);
225    loop {
226        if let Some(ix) = s.find('\\') {
227            if let Some(ch) = decode_unicode_escape(s, ix) {
228                match ret {
229                    Cow::Borrowed(_) => {
230                        let mut decoded = String::with_capacity(s.len());
231                        decoded.push_str(&s[0..ix]);
232                        decoded.push(ch);
233                        ret = Cow::Owned(decoded);
234                    }
235                    Cow::Owned(ref mut owned) => {
236                        owned.push_str(&s[0..ix]);
237                        owned.push(ch);
238                    }
239                }
240                s = &s[(ix + 8)..];
241                continue;
242            }
243            s = &s[(ix + 1)..];
244            continue;
245        }
246
247        match ret {
248            Cow::Borrowed(_) => (),
249            Cow::Owned(ref mut owned) => owned.push_str(s),
250        }
251        break;
252    }
253    return ret;
254}
255
256impl ParseOptions {
257    fn decode_unicode(&self, s: String) -> String {
258        if !self.allow_unicode_escapes {
259            return s;
260        }
261
262        match decode_unicode_escapes(&s) {
263            Cow::Borrowed(_) => s,
264            Cow::Owned(owned) => owned,
265        }
266    }
267}
268
269fn parse_terms(raw: &str, opts: &ParseOptions) -> Vec<Term> {
270    let mut result = Vec::new();
271
272    let mut state = ParseState::Initial;
273    let mut key = None;
274    let mut token = String::new();
275
276    let mut c = raw.chars();
277    loop {
278        match (state, c.next()) {
279            // Initial state handlers
280            (ParseState::Initial, None) => {
281                break;
282            }
283            (ParseState::Initial, Some('-')) => {
284                state = ParseState::Negated;
285            }
286            (ParseState::Initial, Some('\'')) => {
287                state = ParseState::SingleQuote;
288            }
289            (ParseState::Initial, Some('"')) => {
290                state = ParseState::DoubleQuote;
291            }
292            (ParseState::Initial, Some(ref ch)) if ch.is_ascii_whitespace() => {
293                continue;
294            }
295            (ParseState::Initial, Some(ref ch)) => {
296                state = ParseState::RawToken;
297                token.push(*ch);
298            }
299
300            // Negated state handlers
301            (ParseState::Negated, None) => {
302                result.push(Term::from_value("-"));
303                break;
304            }
305            (ParseState::Negated, Some('\'')) => {
306                state = ParseState::NegatedSingleQuote;
307            }
308            (ParseState::Negated, Some('"')) => {
309                state = ParseState::NegatedDoubleQuote;
310            }
311            (ParseState::Negated, Some(ref ch)) if ch.is_ascii_whitespace() => {
312                result.push(Term::from_value("-"));
313                state = ParseState::Initial;
314            }
315            (ParseState::Negated, Some(ref ch)) => {
316                state = ParseState::NegatedRawToken;
317                token.push(*ch);
318            }
319
320            // [Negated] Single/Double quoted state handlers
321            (ParseState::SingleQuote, None) |
322            (ParseState::DoubleQuote, None) |
323            (ParseState::SingleQuoteEscape, None) |
324            (ParseState::DoubleQuoteEscape, None) |
325            (ParseState::NegatedSingleQuote, None) |
326            (ParseState::NegatedDoubleQuote, None) |
327            (ParseState::NegatedSingleQuoteEscape, None) |
328            (ParseState::NegatedDoubleQuoteEscape, None) => {
329                result.push(Term::new(state.is_negated(), None, format!(
330                    "{}{}{}",
331                    if state.is_single_quote() { "'" } else { "\"" },
332                    opts.decode_unicode(token),
333                    if state.is_escaped() { "\\" } else { "" },
334                )));
335                break;
336            }
337            (ParseState::SingleQuoteEscape, Some(ref ch)) |
338            (ParseState::DoubleQuoteEscape, Some(ref ch)) |
339            (ParseState::NegatedSingleQuoteEscape, Some(ref ch)) |
340            (ParseState::NegatedDoubleQuoteEscape, Some(ref ch)) => {
341                if !(*ch == '\'' || *ch == '"' || *ch == '\\') {
342                    token.push('\\');
343                }
344                token.push(*ch);
345                state = state.unescape();
346            }
347            (ParseState::SingleQuote, Some('\'')) |
348            (ParseState::DoubleQuote, Some('"')) |
349            (ParseState::NegatedSingleQuote, Some('\'')) |
350            (ParseState::NegatedDoubleQuote, Some('"')) => {
351                result.push(Term::new(state.is_negated(), None, opts.decode_unicode(token)));
352                token = String::new();
353                state = ParseState::Initial;
354            }
355            (ParseState::SingleQuote, Some(ref ch)) |
356            (ParseState::DoubleQuote, Some(ref ch)) |
357            (ParseState::NegatedSingleQuote, Some(ref ch)) |
358            (ParseState::NegatedDoubleQuote, Some(ref ch)) => {
359                if opts.allow_backslash_quotes && *ch == '\\' {
360                    state = state.escape();
361                } else {
362                    token.push(*ch);
363                }
364            }
365
366            // Raw token state handlers
367            (ParseState::RawToken, None) => {
368                result.push(Term::from_value(opts.decode_unicode(token)));
369                break;
370            }
371            (ParseState::RawToken, Some(':')) => {
372                key = Some(token);
373                token = String::new();
374                state = ParseState::Value;
375            }
376            (ParseState::RawToken, Some(ref ch)) if ch.is_ascii_whitespace() => {
377                result.push(Term::from_value(opts.decode_unicode(token)));
378                token = String::new();
379                state = ParseState::Initial;
380            }
381            (ParseState::RawToken, Some(ref ch)) => {
382                token.push(*ch);
383            }
384
385            // Negated raw token state handlers
386            (ParseState::NegatedRawToken, None) => {
387                result.push(Term::new(true, None, opts.decode_unicode(token)));
388                break;
389            }
390            (ParseState::NegatedRawToken, Some(':')) => {
391                key = Some(token);
392                token = String::new();
393                state = ParseState::NegatedValue;
394            }
395            (ParseState::NegatedRawToken, Some(ref ch)) if ch.is_ascii_whitespace() => {
396                result.push(Term::new(true, None, opts.decode_unicode(token)));
397                token = String::new();
398                state = ParseState::Initial;
399            }
400            (ParseState::NegatedRawToken, Some(ref ch)) => {
401                token.push(*ch);
402            }
403
404            // Value/raw-value state handlers
405            (ParseState::Value, None) |
406            (ParseState::RawValue, None) |
407            (ParseState::NegatedValue, None) |
408            (ParseState::NegatedRawValue, None) => {
409                result.push(Term::new(state.is_negated(), key, opts.decode_unicode(token)));
410                break;
411            }
412            (ParseState::Value, Some('\'')) => {
413                state = ParseState::SingleQuotedValue;
414            }
415            (ParseState::Value, Some('"')) => {
416                state = ParseState::DoubleQuotedValue;
417            }
418            (ParseState::NegatedValue, Some('\'')) => {
419                state = ParseState::NegatedSingleQuotedValue;
420            }
421            (ParseState::NegatedValue, Some('"')) => {
422                state = ParseState::NegatedDoubleQuotedValue;
423            }
424            (ParseState::Value, Some(ref ch)) |
425            (ParseState::RawValue, Some(ref ch)) |
426            (ParseState::NegatedValue, Some(ref ch)) |
427            (ParseState::NegatedRawValue, Some(ref ch))
428                if ch.is_ascii_whitespace() =>
429            {
430                result.push(Term::new(state.is_negated(), key, opts.decode_unicode(token)));
431                key = None;
432                token = String::new();
433                state = ParseState::Initial;
434            }
435            (ParseState::Value, Some(ref ch)) |
436            (ParseState::RawValue, Some(ref ch)) |
437            (ParseState::NegatedValue, Some(ref ch)) |
438            (ParseState::NegatedRawValue, Some(ref ch)) => {
439                token.push(*ch);
440                state = if state.is_negated() { ParseState::NegatedRawValue } else { ParseState::RawValue };
441            }
442
443            (ParseState::SingleQuotedValue, None) |
444            (ParseState::DoubleQuotedValue, None) |
445            (ParseState::SingleQuotedValueEscape, None) |
446            (ParseState::DoubleQuotedValueEscape, None) |
447            (ParseState::NegatedSingleQuotedValue, None) |
448            (ParseState::NegatedDoubleQuotedValue, None) |
449            (ParseState::NegatedSingleQuotedValueEscape, None) |
450            (ParseState::NegatedDoubleQuotedValueEscape, None) => {
451                result.push(Term::new(state.is_negated(), key, format!(
452                    "{}{}{}",
453                    if state.is_single_quote() { "'" } else { "\"" },
454                    opts.decode_unicode(token),
455                    if state.is_escaped() { "\\" } else { "" },
456                )));
457                break;
458            }
459            (ParseState::SingleQuotedValueEscape, Some(ref ch)) |
460            (ParseState::DoubleQuotedValueEscape, Some(ref ch)) |
461            (ParseState::NegatedSingleQuotedValueEscape, Some(ref ch)) |
462            (ParseState::NegatedDoubleQuotedValueEscape, Some(ref ch)) => {
463                if !(*ch == '\'' || *ch == '"' || *ch == '\\') {
464                    token.push('\\');
465                }
466                token.push(*ch);
467                state = state.unescape();
468            }
469            (ParseState::SingleQuotedValue, Some('\'')) |
470            (ParseState::DoubleQuotedValue, Some('"')) |
471            (ParseState::NegatedSingleQuotedValue, Some('\'')) |
472            (ParseState::NegatedDoubleQuotedValue, Some('"')) => {
473                result.push(Term::new(state.is_negated(), key, opts.decode_unicode(token)));
474                key = None;
475                token = String::new();
476                state = ParseState::Initial;
477            }
478            (ParseState::SingleQuotedValue, Some(ref ch)) |
479            (ParseState::DoubleQuotedValue, Some(ref ch)) |
480            (ParseState::NegatedSingleQuotedValue, Some(ref ch)) |
481            (ParseState::NegatedDoubleQuotedValue, Some(ref ch)) => {
482                if opts.allow_backslash_quotes && *ch == '\\' {
483                    state = state.escape();
484                } else {
485                    token.push(*ch);
486                }
487            }
488        }
489    }
490    result
491}
492
493#[cfg(test)]
494mod tests {
495    use super::*;
496
497    #[test]
498    fn empty() {
499        assert_eq!(parse("").terms, &[]);
500        assert_eq!(parse(" ").terms, &[]);
501        assert_eq!(parse("\t \n").terms, &[]);
502    }
503
504    #[test]
505    fn negations() {
506        assert_eq!(parse("-").terms, &[Term::new(false, None, "-")]);
507        assert_eq!(parse("- -").terms, &[Term::new(false, None, "-"), Term::new(false, None, "-")]);
508        assert_eq!(parse("--").terms, &[Term::new(true, None, "-")]);
509        assert_eq!(parse("---").terms, &[Term::new(true, None, "--")]);
510        assert_eq!(parse("--- ---").terms, &[Term::new(true, None, "--"), Term::new(true, None, "--")]);
511        assert_eq!(parse("---:---").terms, &[Term::new(true, Some("--"), "---")]);
512    }
513
514    #[test]
515    fn quoted() {
516        assert_eq!(parse("'hello' 'world'").terms, &[Term::new(false, None, "hello"), Term::new(false, None, "world")]);
517        assert_eq!(parse(" 'hello''world' ").terms, &[Term::new(false, None, "hello"), Term::new(false, None, "world")]);
518        assert_eq!(parse("\"hello\" \"world\"").terms, &[Term::new(false, None, "hello"), Term::new(false, None, "world")]);
519        assert_eq!(parse(" \"hello\"\"world\" ").terms, &[Term::new(false, None, "hello"), Term::new(false, None, "world")]);
520
521        assert_eq!(parse("-'hello' 'world'").terms, &[Term::new(true, None, "hello"), Term::new(false, None, "world")]);
522        assert_eq!(parse(" 'hello'-'world' ").terms, &[Term::new(false, None, "hello"), Term::new(true, None, "world")]);
523        assert_eq!(parse("\"hello\" -\"world\"").terms, &[Term::new(false, None, "hello"), Term::new(true, None, "world")]);
524        assert_eq!(parse(" -\"hello\"-\"world\" ").terms, &[Term::new(true, None, "hello"), Term::new(true, None, "world")]);
525    }
526
527    #[test]
528    fn raw_tokens() {
529        assert_eq!(parse("hello").terms, &[Term::from_value("hello")]);
530        assert_eq!(parse(" hello ").terms, &[Term::from_value("hello")]);
531        assert_eq!(parse(" hello world ").terms, &[Term::from_value("hello"), Term::from_value("world")]);
532        assert_eq!(parse("\rhello\nworld\t").terms, &[Term::from_value("hello"), Term::from_value("world")]);
533
534        assert_eq!(parse(" -hello ").terms, &[Term::new(true, None, "hello")]);
535        assert_eq!(parse(" -hello-world ").terms, &[Term::new(true, None, "hello-world")]);
536        assert_eq!(parse(" -hello -world ").terms, &[Term::new(true, None, "hello"), Term::new(true, None, "world")]);
537    }
538
539    #[test]
540    fn raw_values() {
541        assert_eq!(parse("key:value").terms, &[Term::new(false, Some("key"), "value")]);
542        assert_eq!(parse("key:value key2:value2").terms, &[Term::new(false, Some("key"), "value"), Term::new(false, Some("key2"), "value2")]);
543        assert_eq!(parse("key: anotherValue").terms, &[Term::new(false, Some("key"), ""), Term::new(false, None, "anotherValue")]);
544        assert_eq!(parse(" key:value ").terms, &[Term::new(false, Some("key"), "value")]);
545
546        assert_eq!(parse("-key:value").terms, &[Term::new(true, Some("key"), "value")]);
547        assert_eq!(parse(" -key:value ").terms, &[Term::new(true, Some("key"), "value")]);
548        assert_eq!(parse(" key:-value ").terms, &[Term::new(false, Some("key"), "-value")]);
549        assert_eq!(parse(" key:- ").terms, &[Term::new(false, Some("key"), "-")]);
550    }
551
552    #[test]
553    fn quoted_values() {
554        assert_eq!(parse("key:'value'").terms, &[Term::new(false, Some("key"), "value")]);
555        assert_eq!(parse("key:\"value with spaces\"").terms, &[Term::new(false, Some("key"), "value with spaces")]);
556        assert_eq!(parse("key:\"value\" key2:'another value'").terms, &[Term::new(false, Some("key"), "value"), Term::new(false, Some("key2"), "another value")]);
557
558        assert_eq!(parse("-key:'value'").terms, &[Term::new(true, Some("key"), "value")]);
559        assert_eq!(parse("-key:\"value with spaces\"").terms, &[Term::new(true, Some("key"), "value with spaces")]);
560        assert_eq!(parse("key:\"value\" -key2:'another value'").terms, &[Term::new(false, Some("key"), "value"), Term::new(true, Some("key2"), "another value")]);
561    }
562
563    #[test]
564    fn end_unexpectedly() {
565        assert_eq!(parse(" -").terms, &[Term::new(false, None, "-")]);
566        assert_eq!(parse("'hello").terms, &[Term::new(false, None, "'hello")]);
567        assert_eq!(parse("'hello\\").terms, &[Term::new(false, None, "'hello\\")]);
568        assert_eq!(parse("\"hello ").terms, &[Term::new(false, None, "\"hello ")]);
569        assert_eq!(parse("hello\\").terms, &[Term::new(false, None, "hello\\")]);
570        assert_eq!(parse("-'hello").terms, &[Term::new(true, None, "'hello")]);
571        assert_eq!(parse("-'hello\\").terms, &[Term::new(true, None, "'hello\\")]);
572        assert_eq!(parse("-\"hello ").terms, &[Term::new(true, None, "\"hello ")]);
573        assert_eq!(parse("-hello\\").terms, &[Term::new(true, None, "hello\\")]);
574
575        assert_eq!(parse("key:\"value ").terms, &[Term::new(false, Some("key"), "\"value ")]);
576        assert_eq!(parse("-key:'value ").terms, &[Term::new(true, Some("key"), "'value ")]);
577    }
578
579    #[test]
580    fn parse_unicode() {
581        let p = |x| parse_with_options(x, ParseOptions::default().allow_unicode_escapes(false));
582        let pu = |x| parse_with_options(x, ParseOptions::default().allow_unicode_escapes(true));
583
584        assert_eq!(p("\\u002021z").terms, &[Term::new(false, None, "\\u002021z")]);
585        assert_eq!(pu("\\u002021z").terms, &[Term::new(false, None, "\u{2021}z")]);
586        assert_eq!(pu("\\u00202xz").terms, &[Term::new(false, None, "\\u00202xz")]);
587        assert_eq!(pu("\\u00202z").terms, &[Term::new(false, None, "\\u00202z")]);
588        assert_eq!(pu("\\v002021z").terms, &[Term::new(false, None, "\\v002021z")]);
589
590        assert_eq!(p("\\u002021:'\\u002022 \\u002023'").terms, &[Term::new(false, Some("\\u002021"), "\\u002022 \\u002023")]);
591        assert_eq!(pu("\\u002021:'\\u002022 \\u002023'").terms, &[Term::new(false, Some("\\u002021"), "\u{2022} \u{2023}")]);
592    }
593
594    #[test]
595    fn parse_escapes() {
596        let p = |x| parse_with_options(x, ParseOptions::default().allow_backslash_quotes(false));
597        let pe = |x| parse_with_options(x, ParseOptions::default().allow_backslash_quotes(true));
598
599        assert_eq!(pe(r#"'fred says \'hi\''"#).terms, &[Term::new(false, None, "fred says 'hi'")]);
600        assert_eq!(pe(r#""fred says \'hi\'""#).terms, &[Term::new(false, None, "fred says 'hi'")]);
601        assert_eq!(pe(r#""fred says \"hi\"""#).terms, &[Term::new(false, None, "fred says \"hi\"")]);
602        assert_eq!(pe(r#"'fred says \"hi\"'"#).terms, &[Term::new(false, None, "fred says \"hi\"")]);
603        assert_eq!(pe(r#"'backslashes \\ \n'"#).terms, &[Term::new(false, None, "backslashes \\ \\n")]);
604        assert_eq!(pe(r#""backslashes \\ \n""#).terms, &[Term::new(false, None, "backslashes \\ \\n")]);
605
606        assert_eq!(p(r#"'fred says \'hi\''"#).terms, &[Term::new(false, None, "fred says \\"), Term::new(false, None, "hi\\''")]);
607        assert_eq!(p(r#""fred says \'hi\'""#).terms, &[Term::new(false, None, "fred says \\'hi\\'")]);
608        assert_eq!(p(r#""fred says \"hi\"""#).terms, &[Term::new(false, None, "fred says \\"), Term::new(false, None, "hi\\\"\"")]);
609        assert_eq!(p(r#"'fred says \"hi\"'"#).terms, &[Term::new(false, None, "fred says \\\"hi\\\"")]);
610        assert_eq!(p(r#"'backslashes \\ \n'"#).terms, &[Term::new(false, None, "backslashes \\\\ \\n")]);
611        assert_eq!(p(r#""backslashes \\ \n""#).terms, &[Term::new(false, None, "backslashes \\\\ \\n")]);
612
613        assert_eq!(pe(r#"in\"a\\raw\nword"#).terms, &[Term::new(false, None, "in\\\"a\\\\raw\\nword")]);
614        assert_eq!(p(r#"in\"a\\raw\nword"#).terms, &[Term::new(false, None, "in\\\"a\\\\raw\\nword")]);
615
616        assert_eq!(pe(r#"fred:'\'hi\''"#).terms, &[Term::new(false, Some("fred"), "'hi'")]);
617        assert_eq!(pe(r#"fred:"\'hi\'""#).terms, &[Term::new(false, Some("fred"), "'hi'")]);
618        assert_eq!(pe(r#"fred:"\"hi\"""#).terms, &[Term::new(false, Some("fred"), "\"hi\"")]);
619        assert_eq!(pe(r#"fred:'\"hi\"'"#).terms, &[Term::new(false, Some("fred"), "\"hi\"")]);
620        assert_eq!(pe(r#"back:'slashes \\ \n'"#).terms, &[Term::new(false, Some("back"), "slashes \\ \\n")]);
621        assert_eq!(pe(r#"back:"slashes \\ \n""#).terms, &[Term::new(false, Some("back"), "slashes \\ \\n")]);
622
623        assert_eq!(p(r#"fred:'\'hi\''"#).terms, &[Term::new(false, Some("fred"), "\\"), Term::new(false, None, "hi\\''")]);
624        assert_eq!(p(r#"fred:"\'hi\'""#).terms, &[Term::new(false, Some("fred"), "\\'hi\\'")]);
625        assert_eq!(p(r#"fred:"\"hi\"""#).terms, &[Term::new(false, Some("fred"), "\\"), Term::new(false, None, "hi\\\"\"")]);
626        assert_eq!(p(r#"fred:'\"hi\"'"#).terms, &[Term::new(false, Some("fred"), "\\\"hi\\\"")]);
627        assert_eq!(p(r#"back:'slashes \\ \n'"#).terms, &[Term::new(false, Some("back"), "slashes \\\\ \\n")]);
628        assert_eq!(p(r#"back:"slashes \\ \n""#).terms, &[Term::new(false, Some("back"), "slashes \\\\ \\n")]);
629
630        assert_eq!(pe(r#"raw:in\"a\\raw\nword"#).terms, &[Term::new(false, Some("raw"), "in\\\"a\\\\raw\\nword")]);
631        assert_eq!(p(r#"raw:in\"a\\raw\nword"#).terms, &[Term::new(false, Some("raw"), "in\\\"a\\\\raw\\nword")]);
632    }
633
634    #[test]
635    fn parse_unicode_and_escape() {
636        let p = |x| parse_with_options(
637            x,
638            ParseOptions::default().allow_unicode_escapes(true).allow_backslash_quotes(true)
639        );
640
641        assert_eq!(p(r#"fred:"\'sup \u01F436""#).terms, &[Term::new(false, Some("fred"), "'sup \u{1F436}")]);
642        assert_eq!(p(r#"fred:"\'sup \\u01F436""#).terms, &[Term::new(false, Some("fred"), "'sup \u{1F436}")]);
643        assert_eq!(p(r#"fred:"\'sup \u00005cu01F436""#).terms, &[Term::new(false, Some("fred"), "'sup \\u01F436")]);
644    }
645
646    #[test]
647    fn readme_example() {
648        println!("{:?}", parse("from:foo -subject:'a long subject \\u00270c' baz"));
649    }
650}