pidgin/
lib.rs

1/*!
2
3This crate provides a library for generating efficient regular expressions
4represent a non-recursive grammar and a mechanism to build a parse tree from capturing groups in
5the expression. It uses the [`regex`](https://crates.io/crates/regex) crate for its parsing engine.
6
7# Usage
8
9This crate is [on crates.io](https://crates.io/crates/pidgin) and can be
10used by adding `pidgin` to your dependencies in your project's `Cargo.toml`.
11
12```toml
13[dependencies]
14pidgin = "0.2.0"
15```
16
17and this to your crate root:
18
19```rust
20#[macro_use]
21extern crate pidgin;
22# fn main() {}
23```
24
25# Example: find a date
26
27```rust
28# #![recursion_limit="256"]
29# #[macro_use]
30# extern crate pidgin;
31# fn main() {
32let date = grammar!{
33    (?ibB)
34
35    date -> <weekday> (",") <month> <monthday> (",") <year>
36    date -> <month> <monthday> | <weekday> | <monthday> <month> <year>
37    date -> <month> <monthday> (",") <year>
38    date -> <numeric_date>
39
40    numeric_date -> <year> ("/") <numeric_month> ("/") <numeric_day>
41    numeric_date -> <year> ("-") <numeric_month> ("-") <numeric_day>
42    numeric_date -> <numeric_month> ("/") <numeric_day> ("/") <year>
43    numeric_date -> <numeric_month> ("-") <numeric_day> ("-") <year>
44    numeric_date -> <numeric_day> ("/") <numeric_month> ("/") <year>
45    numeric_date -> <numeric_day> ("-") <numeric_month> ("-") <year>
46
47    year    => r(r"\b[12][0-9]{3}|[0-9]{2}\b")
48    weekday => [
49            "Sunday Monday Tuesday Wednesday Thursday Friday Saturday"
50                .split(" ")
51                .into_iter()
52                .flat_map(|s| vec![s, &s[0..2], &s[0..3]])
53                .collect::<Vec<_>>()
54        ]
55    weekday     => (?-i) [["M", "T", "W", "R", "F", "S", "U"]]
56    monthday    => [(1..=31).into_iter().collect::<Vec<_>>()]
57    numeric_day => [
58            (1..=31)
59                .into_iter()
60                .flat_map(|i| vec![i.to_string(), format!("{:02}", i)])
61                .collect::<Vec<_>>()
62        ]
63    month => [
64        vec![
65            "January",
66            "February",
67            "March",
68            "April",
69            "May",
70            "June",
71            "July",
72            "August",
73            "September",
74            "October",
75            "November",
76            "December",
77        ].into_iter().flat_map(|s| vec![s, &s[0..3]]).collect::<Vec<_>>()
78      ]
79    numeric_month => [
80            (1..=31)
81                .into_iter()
82                .flat_map(|i| vec![i.to_string(), format!("{:02}", i)])
83                .collect::<Vec<_>>()
84        ]
85};
86let matcher = date.matcher().unwrap();
87
88// we let whitespace vary
89assert!(matcher.is_match(" June   6,    1969 "));
90// we made it case-insensitive
91assert!(matcher.is_match("june 6, 1969"));
92// but we want to respect word boundaries
93assert!(!matcher.is_match("jejune 6, 1969"));
94// we can inspect the parse tree
95let m = matcher.parse("2018/10/6").unwrap();
96assert!(m.name("numeric_date").is_some());
97assert_eq!(m.name("year").unwrap().as_str(), "2018");
98let m = matcher.parse("Friday").unwrap();
99assert!(!m.name("numeric_date").is_some());
100assert!(m.name("weekday").is_some());
101// still more crazy things we allow
102assert!(matcher.is_match("F"));
103assert!(matcher.is_match("friday"));
104assert!(matcher.is_match("Fri"));
105// but we said single-letter days had to be capitalized
106assert!(!matcher.is_match("f"));
107# }
108```
109
110This macro is the raison d'etre of pidgin. It gives you a [`Grammar`] which can itself be used in other
111[`Grammar`]s via the `g(grammar)` element, it can server as a library of [`Grammar`]s via the [`rule`] method,
112or via its [`matcher`] method it can give you a [`Matcher`] object which will allow you to
113parse a string to produce a [`Match`] parse tree.
114
115[`Grammar`]: ../pidgin/struct.Grammar.html
116[`rule`]: ../pidgin/struct.Grammar.html#method.rule
117[`matcher`]: ../pidgin/struct.Grammar.html#method.matcher
118[`Matcher`]: ../pidgin/struct.Matcher.html
119[`Match`]: ../pidgin/struct.Match.html
120*/
121
122extern crate regex;
123#[macro_use]
124extern crate lazy_static;
125#[macro_use]
126extern crate serde;
127
128mod grammar;
129#[macro_use]
130#[doc(hidden)]
131pub mod macros;
132mod matching;
133mod pidgin;
134mod util;
135pub use self::grammar::Grammar;
136pub use self::matching::{Match, Matcher};