1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
//! Chomp is a fast monadic-style parser combinator library for the Rust programming language. It was //! written as the culmination of the experiments detailed in these blog posts: //! //! * [Part 1](http://m4rw3r.github.io/parser-combinator-experiments-rust/) //! * [Part 2](http://m4rw3r.github.io/parser-combinator-experiments-errors) //! * [Part 3](http://m4rw3r.github.io/parser-combinator-experiments-part-3) //! //! For its current capabilities, you will find that Chomp performs consistently as well, if not //! better, than optimized C parsers, while being vastly more expressive. For an example that //! builds a performant HTTP parser out of smaller parsers, see //! [http_parser.rs](examples/http_parser.rs). //! //! # Example //! //! ``` //! # #[macro_use] extern crate chomp; //! # fn main() { //! use chomp::{Input, U8Result, parse_only}; //! use chomp::{take_while1, token}; //! //! #[derive(Debug, Eq, PartialEq)] //! struct Name<'a> { //! first: &'a [u8], //! last: &'a [u8], //! } //! //! fn name(i: Input<u8>) -> U8Result<Name> { //! parse!{i; //! let first = take_while1(|c| c != b' '); //! token(b' '); // skipping this char //! let last = take_while1(|c| c != b'\n'); //! //! ret Name{ //! first: first, //! last: last, //! } //! } //! } //! //! assert_eq!(parse_only(name, "Martin Wernstål\n".as_bytes()), Ok(Name{ //! first: b"Martin", //! last: "Wernstål".as_bytes() //! })); //! # } //! ``` //! //! # Usage //! //! Chomp's functionality is split between three modules: //! //! * `parsers` contains the basic parsers used to parse streams of input. //! * `combinators` contains functions which take parsers and return new ones. //! * `primitives` contains the building blocks used to make new parsers. This is advanced usage and //! is far more involved than using the pre-existing parsers, but is sometimes unavoidable. //! //! A parser is, at its simplest, a function that takes a slice of input and returns a //! `ParserResult<I, T, E>`, where `I`, `T`, and `E` are the input, output, and error types, //! respectively. Parsers are usually parameterized over values or other parsers as well, so these //! appear as extra arguments in the parsing function. As an example, here is the signature of the //! `token` parser, which matches a particular input. //! //! ```ignore //! fn token<I: Copy + Eq>(i: Input<I>, t: I) -> SimpleResult<I, I> {...} //! ``` //! //! Notice that the first argument is an `Input<I>`, and the second argument is some `I`. //! `Input<I>` is just a datatype over the current state of the parser and a slice of input `I`, //! and prevents the parser writer from accidentally mutating the state of the parser. Later, when //! we introduce the `parse!` macro, we will see that using a parser in this macro just means //! supplying all of the arguments but the input, as so: //! //! ```ignore //! token(b'T'); //! ``` //! //! Note that you cannot do this outside of the `parse!` macro. `SimpleResult<I, T>` is a //! convenience type alias over `ParseResult<I, T, Error<u8>>`, and `Error<I>` is just a convenient //! "default" error type that will be sufficient for most uses. For more sophisticated usage, one //! can always write a custom error type. //! //! A very useful parser is the `satisfy` parser: //! //! ```ignore //! fn satisfy<I: Copy, F>(i: Input<I>, f: F) -> SimpleResult<I, I> //! where F: FnOnce(I) -> bool { ... } //! ``` //! //! Besides the input state, satisfy's only parameter is a predicate function and will succeed only //! if the next piece of input satisfies the supplied predicate. Here's an example that might be //! used in the `parse!` macro: //! //! ``` //! # #[macro_use] extern crate chomp; //! # fn main() { //! # use chomp::{Input, satisfy, parse_only}; //! # let r = parse_only(parser!{ //! satisfy(|c| { //! match c { //! b'c' | b'h' | b'a' | b'r' => true, //! _ => false, //! } //! }) //! # }, b"h"); //! # assert_eq!(r, Ok(b'h')); //! # } //! ``` //! //! This parser will only succeed if the character is one of the characters in "char". //! //! Lastly, here is the parser combinator `count`, which will attempt to run a parser a number of //! times on its input. //! //! ```ignore //! pub fn count<'a, I, T, E, F, U>(i: Input<'a, I>, num: usize, p: F) -> ParseResult<'a, I, T, E> //! where I: Copy, //! U: 'a, //! F: FnMut(Input<'a, I>) -> ParseResult<'a, I, U, E>, //! T: FromIterator<U> { ... } //! ``` //! //! Using parsers is almost entirely done using the `parse!` macro, which enables us to do three //! distinct things: //! //! * Sequence parsers over the remaining input //! * Store intermediate results into datatypes //! * Return a datatype at the end, which may be the result of any arbitrary computation over the //! intermediate results. //! //! In other words, just as a normal Rust function usually looks something like this: //! //! ``` //! # fn launch_missiles() {} //! # fn read_number() -> u8 { 3 } //! fn f() -> (u8, u8, u8) { //! let a = read_number(); //! let b = read_number(); //! launch_missiles(); //! return (a, b, a + b); //! } //! ``` //! //! A Chomp parser with a similar structure looks like this: //! //! ``` //! # #[macro_use] extern crate chomp; //! # use chomp::{Input, parse_only, satisfy, string, token, U8Result}; //! fn f(i: Input<u8>) -> U8Result<(u8, u8, u8)> { //! parse!{i; //! let a = digit(); //! let b = digit(); //! string(b"missiles"); //! ret (a, b, a + b) //! } //! } //! //! fn digit(i: Input<u8>) -> U8Result<u8> { //! satisfy(i, |c| b'0' <= c && c <= b'9').map(|c| c - b'0') //! } //! # fn main() { //! # let r = parse_only(f, b"33missiles"); //! # assert_eq!(r, Ok((3, 3, 6))); //! # } //! ``` //! //! Readers familiar with Haskell or F# will recognize this as a "monadic computation" or //! "computation expression". //! //! You use the `parse!` macro as follows: //! //! - Write the input parameter first, with a semicolon. //! - Write any number of valid parser actions or identifier bindings, where: //! - a parser action takes the form `parser(params*)`, with the input parameter omitted. //! - an identifier binding takes the form `let identifer = parser(params*);`, with the input //! parameter omitted. //! - Write the final line of the macro, which must always be either a parser action, or a return //! statement which takes the form `ret expression`. The type of `expression` becomes the return //! type of the entire parser, should it succeed. //! //! The entire grammar for the macro is listed elsewhere in this documentation. #[macro_use] extern crate bitflags; extern crate conv; #[macro_use] mod macros; mod input; mod parse; mod parse_result; pub mod ascii; pub mod buffer; pub mod parsers; pub mod combinators; pub use combinators::{ count, option, or, many, many1, sep_by, sep_by1, many_till, skip_many, skip_many1, matched_by, }; pub use parsers::{ any, eof, not_token, peek, peek_next, satisfy, satisfy_with, scan, string, run_scanner, take, take_remainder, take_till, take_while, take_while1, token, }; pub use parsers::Error; pub use input::Input; pub use parse::{ ParseError, parse_only, }; pub use parse_result::{ ParseResult, SimpleResult, U8Result, }; /// Module used to construct fundamental parsers and combinators. /// /// # Primitive /// /// Only used by fundamental parsers and combinators. pub mod primitives { pub use input::{ InputBuffer, InputClone, }; pub use parse_result::{ IntoInner, State, }; /// Input utilities. /// /// # Primitive /// /// Only used by fundamental parsers and combinators. pub mod input { pub use input::{DEFAULT, END_OF_INPUT, new}; } /// ParseResult utilities. /// /// # Primitive /// /// Only used by fundamental parsers and combinators. /// /// # Note /// /// Prefer to use ``Input::ret``, ``Input::err`` or ``Input::incomplete`` instead of using /// ``parse_result::new``. pub mod parse_result { pub use parse_result::new; } }