pidgin/lib.rs
1/*!
2
3This crate provides a library for generating efficient regular expressions
4represent a non-recursive grammar and a mechanism to build a parse tree from capturing groups in
5the expression. It uses the [`regex`](https://crates.io/crates/regex) crate for its parsing engine.
6
7# Usage
8
9This crate is [on crates.io](https://crates.io/crates/pidgin) and can be
10used by adding `pidgin` to your dependencies in your project's `Cargo.toml`.
11
12```toml
13[dependencies]
14pidgin = "0.2.0"
15```
16
17and this to your crate root:
18
19```rust
20#[macro_use]
21extern crate pidgin;
22# fn main() {}
23```
24
25# Example: find a date
26
27```rust
28# #![recursion_limit="256"]
29# #[macro_use]
30# extern crate pidgin;
31# fn main() {
32let date = grammar!{
33 (?ibB)
34
35 date -> <weekday> (",") <month> <monthday> (",") <year>
36 date -> <month> <monthday> | <weekday> | <monthday> <month> <year>
37 date -> <month> <monthday> (",") <year>
38 date -> <numeric_date>
39
40 numeric_date -> <year> ("/") <numeric_month> ("/") <numeric_day>
41 numeric_date -> <year> ("-") <numeric_month> ("-") <numeric_day>
42 numeric_date -> <numeric_month> ("/") <numeric_day> ("/") <year>
43 numeric_date -> <numeric_month> ("-") <numeric_day> ("-") <year>
44 numeric_date -> <numeric_day> ("/") <numeric_month> ("/") <year>
45 numeric_date -> <numeric_day> ("-") <numeric_month> ("-") <year>
46
47 year => r(r"\b[12][0-9]{3}|[0-9]{2}\b")
48 weekday => [
49 "Sunday Monday Tuesday Wednesday Thursday Friday Saturday"
50 .split(" ")
51 .into_iter()
52 .flat_map(|s| vec![s, &s[0..2], &s[0..3]])
53 .collect::<Vec<_>>()
54 ]
55 weekday => (?-i) [["M", "T", "W", "R", "F", "S", "U"]]
56 monthday => [(1..=31).into_iter().collect::<Vec<_>>()]
57 numeric_day => [
58 (1..=31)
59 .into_iter()
60 .flat_map(|i| vec![i.to_string(), format!("{:02}", i)])
61 .collect::<Vec<_>>()
62 ]
63 month => [
64 vec![
65 "January",
66 "February",
67 "March",
68 "April",
69 "May",
70 "June",
71 "July",
72 "August",
73 "September",
74 "October",
75 "November",
76 "December",
77 ].into_iter().flat_map(|s| vec![s, &s[0..3]]).collect::<Vec<_>>()
78 ]
79 numeric_month => [
80 (1..=31)
81 .into_iter()
82 .flat_map(|i| vec![i.to_string(), format!("{:02}", i)])
83 .collect::<Vec<_>>()
84 ]
85};
86let matcher = date.matcher().unwrap();
87
88// we let whitespace vary
89assert!(matcher.is_match(" June 6, 1969 "));
90// we made it case-insensitive
91assert!(matcher.is_match("june 6, 1969"));
92// but we want to respect word boundaries
93assert!(!matcher.is_match("jejune 6, 1969"));
94// we can inspect the parse tree
95let m = matcher.parse("2018/10/6").unwrap();
96assert!(m.name("numeric_date").is_some());
97assert_eq!(m.name("year").unwrap().as_str(), "2018");
98let m = matcher.parse("Friday").unwrap();
99assert!(!m.name("numeric_date").is_some());
100assert!(m.name("weekday").is_some());
101// still more crazy things we allow
102assert!(matcher.is_match("F"));
103assert!(matcher.is_match("friday"));
104assert!(matcher.is_match("Fri"));
105// but we said single-letter days had to be capitalized
106assert!(!matcher.is_match("f"));
107# }
108```
109
110This macro is the raison d'etre of pidgin. It gives you a [`Grammar`] which can itself be used in other
111[`Grammar`]s via the `g(grammar)` element, it can server as a library of [`Grammar`]s via the [`rule`] method,
112or via its [`matcher`] method it can give you a [`Matcher`] object which will allow you to
113parse a string to produce a [`Match`] parse tree.
114
115[`Grammar`]: ../pidgin/struct.Grammar.html
116[`rule`]: ../pidgin/struct.Grammar.html#method.rule
117[`matcher`]: ../pidgin/struct.Grammar.html#method.matcher
118[`Matcher`]: ../pidgin/struct.Matcher.html
119[`Match`]: ../pidgin/struct.Match.html
120*/
121
122extern crate regex;
123#[macro_use]
124extern crate lazy_static;
125#[macro_use]
126extern crate serde;
127
128mod grammar;
129#[macro_use]
130#[doc(hidden)]
131pub mod macros;
132mod matching;
133mod pidgin;
134mod util;
135pub use self::grammar::Grammar;
136pub use self::matching::{Match, Matcher};