1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
/*!

This crate provides a library for generating efficient regular expressions
represent a non-recursive grammar and a mechanism to build a parse tree from capturing groups in
the expression. It uses the [`regex`](https://crates.io/crates/regex) crate for its parsing engine.

# Usage

This crate is [on crates.io](https://crates.io/crates/pidgin) and can be
used by adding `pidgin` to your dependencies in your project's `Cargo.toml`.

```toml
[dependencies]
pidgin = "0.2.0"
```

and this to your crate root:

```rust
#[macro_use]
extern crate pidgin;
# fn main() {}
```

# Example: find a date

```rust
# #![recursion_limit="256"]
# #[macro_use]
# extern crate pidgin;
# fn main() {
let date = grammar!{
    (?ibB)

    date -> <weekday> (",") <month> <monthday> (",") <year>
    date -> <month> <monthday> | <weekday> | <monthday> <month> <year>
    date -> <month> <monthday> (",") <year>
    date -> <numeric_date>

    numeric_date -> <year> ("/") <numeric_month> ("/") <numeric_day>
    numeric_date -> <year> ("-") <numeric_month> ("-") <numeric_day>
    numeric_date -> <numeric_month> ("/") <numeric_day> ("/") <year>
    numeric_date -> <numeric_month> ("-") <numeric_day> ("-") <year>
    numeric_date -> <numeric_day> ("/") <numeric_month> ("/") <year>
    numeric_date -> <numeric_day> ("-") <numeric_month> ("-") <year>

    year    => r(r"\b[12][0-9]{3}|[0-9]{2}\b")
    weekday => [
            "Sunday Monday Tuesday Wednesday Thursday Friday Saturday"
                .split(" ")
                .into_iter()
                .flat_map(|s| vec![s, &s[0..2], &s[0..3]])
                .collect::<Vec<_>>()
        ]
    weekday     => (?-i) [["M", "T", "W", "R", "F", "S", "U"]]
    monthday    => [(1..=31).into_iter().collect::<Vec<_>>()]
    numeric_day => [
            (1..=31)
                .into_iter()
                .flat_map(|i| vec![i.to_string(), format!("{:02}", i)])
                .collect::<Vec<_>>()
        ]
    month => [
        vec![
            "January",
            "February",
            "March",
            "April",
            "May",
            "June",
            "July",
            "August",
            "September",
            "October",
            "November",
            "December",
        ].into_iter().flat_map(|s| vec![s, &s[0..3]]).collect::<Vec<_>>()
      ]
    numeric_month => [
            (1..=31)
                .into_iter()
                .flat_map(|i| vec![i.to_string(), format!("{:02}", i)])
                .collect::<Vec<_>>()
        ]
};
let matcher = date.matcher().unwrap();

// we let whitespace vary
assert!(matcher.is_match(" June   6,    1969 "));
// we made it case-insensitive
assert!(matcher.is_match("june 6, 1969"));
// but we want to respect word boundaries
assert!(!matcher.is_match("jejune 6, 1969"));
// we can inspect the parse tree
let m = matcher.parse("2018/10/6").unwrap();
assert!(m.name("numeric_date").is_some());
assert_eq!(m.name("year").unwrap().as_str(), "2018");
let m = matcher.parse("Friday").unwrap();
assert!(!m.name("numeric_date").is_some());
assert!(m.name("weekday").is_some());
// still more crazy things we allow
assert!(matcher.is_match("F"));
assert!(matcher.is_match("friday"));
assert!(matcher.is_match("Fri"));
// but we said single-letter days had to be capitalized
assert!(!matcher.is_match("f"));
# }
```

This macro is the raison d'etre of pidgin. It gives you a [`Grammar`] which can itself be used in other
[`Grammar`]s via the `g(grammar)` element, it can server as a library of [`Grammar`]s via the [`rule`] method,
or via its [`matcher`] method it can give you a [`Matcher`] object which will allow you to
parse a string to produce a [`Match`] parse tree.

[`Grammar`]: ../pidgin/struct.Grammar.html
[`rule`]: ../pidgin/struct.Grammar.html#method.rule
[`matcher`]: ../pidgin/struct.Grammar.html#method.matcher
[`Matcher`]: ../pidgin/struct.Matcher.html
[`Match`]: ../pidgin/struct.Match.html
*/

extern crate regex;
#[macro_use]
extern crate lazy_static;
#[macro_use]
extern crate serde;

mod grammar;
#[macro_use]
#[doc(hidden)]
pub mod macros;
mod matching;
mod pidgin;
mod util;
pub use self::grammar::Grammar;
pub use self::matching::{Match, Matcher};