ingredient/
lib.rs

1use std::collections::HashSet;
2use std::iter::FromIterator;
3
4pub use crate::ingredient::Ingredient;
5use fraction::fraction_number;
6use nom::{
7    branch::alt,
8    bytes::complete::tag,
9    character::complete::{alpha1, char, not_line_ending, space0, space1},
10    combinator::{opt, verify},
11    error::{context, VerboseError},
12    multi::{many1, separated_list1},
13    number::complete::double,
14    sequence::{delimited, tuple},
15    IResult,
16};
17use tracing::info;
18use unit::Measure;
19
20extern crate nom;
21
22#[cfg(feature = "serde-derive")]
23#[macro_use]
24extern crate serde;
25
26mod fraction;
27pub mod ingredient;
28pub mod rich_text;
29pub mod unit;
30pub mod util;
31pub type Res<T, U> = IResult<T, U, VerboseError<T>>;
32
33/// use [IngredientParser] to customize
34pub fn from_str(input: &str) -> Ingredient {
35    (IngredientParser::new(false)).from_str(input)
36}
37
38#[derive(Clone, PartialEq, Debug, Default)]
39pub struct IngredientParser {
40    pub units: HashSet<String>,
41    pub adjectives: HashSet<String>,
42    pub is_rich_text: bool,
43}
44impl IngredientParser {
45    pub fn new(is_rich_text: bool) -> Self {
46        let units: Vec<String> = vec![
47            // non standard units - these aren't really convertible for the most part.
48            // default set
49            "whole", "packet", "sticks", "stick", "cloves", "clove", "bunch", "head", "large",
50            "pinch", "small", "medium", "package", "recipe", "slice", "standard", "can", "leaf",
51            "leaves", "strand",
52        ]
53        .iter()
54        .map(|&s| s.into())
55        .collect();
56        let adjectives: Vec<String> = vec![
57            "chopped",
58            "minced",
59            "diced",
60            "freshly ground",
61            "freshly grated",
62            "finely chopped",
63            "thinly sliced",
64            "sliced",
65            "plain",
66            "to taste",
67        ]
68        .iter()
69        .map(|&s| s.into())
70        .collect();
71        IngredientParser {
72            units: HashSet::from_iter(units.iter().cloned()),
73            adjectives: HashSet::from_iter(adjectives.iter().cloned()),
74            is_rich_text,
75        }
76    }
77    /// wrapper for [self.parse_ingredient]
78    /// ```
79    /// use ingredient::{from_str};
80    /// assert_eq!(from_str("one whole egg").to_string(),"1 whole egg");
81    /// ```
82    pub fn from_str(self, input: &str) -> Ingredient {
83        //todo: add back error handling? can't get this to ever fail since parser is pretty flexible
84        self.parse_ingredient(input).unwrap().1
85    }
86
87    /// Parses one or two amounts, e.g. `12 grams` or `120 grams / 1 cup`. Used by [self.parse_ingredient].
88    /// ```
89    /// use ingredient::{IngredientParser,unit::Measure};
90    /// let ip = IngredientParser::new(false);
91    /// assert_eq!(
92    ///    ip.parse_amount("120 grams"),
93    ///    vec![Measure::parse_new("grams",120.0)]
94    ///  );
95    /// assert_eq!(
96    ///    ip.parse_amount("120 grams / 1 cup"),
97    ///    vec![Measure::parse_new("grams",120.0),Measure::parse_new("cup", 1.0)]
98    ///  );
99    /// assert_eq!(
100    ///    ip.parse_amount("120 grams / 1 cup / 1 whole"),
101    ///    vec![Measure::parse_new("grams",120.0),Measure::parse_new("cup", 1.0),Measure::parse_new("whole", 1.0)]
102    ///  );
103    /// ```
104    #[tracing::instrument(name = "parse_amount")]
105    pub fn parse_amount(&self, input: &str) -> Vec<Measure> {
106        // todo: also can't get this one to fail either
107        self.clone().many_amount(input).expect(input).1
108    }
109
110    /// Parse an ingredient line item, such as `120 grams / 1 cup whole wheat flour, sifted lightly`.
111    ///
112    /// returns an [Ingredient], Can be used as a wrapper to return verbose errors.
113    ///
114    /// supported formats include:
115    /// * 1 g name
116    /// * 1 g / 1g name, modifier
117    /// * 1 g; 1 g name
118    /// * ¼ g name
119    /// * 1/4 g name
120    /// * 1 ¼ g name
121    /// * 1 1/4 g name
122    /// * 1 g (1 g) name
123    /// * 1 g name (about 1 g; 1 g)
124    /// * name
125    /// * 1 name
126    /// ```
127    /// use ingredient::{IngredientParser, ingredient::Ingredient, unit::Measure};
128    /// let ip = IngredientParser::new(false);
129    /// assert_eq!(
130    ///     ip.parse_ingredient("1¼  cups / 155.5 grams flour"),
131    ///     Ok((
132    ///         "",
133    ///         Ingredient {
134    ///             name: "flour".to_string(),
135    ///             amounts: vec![
136    ///                 Measure::parse_new("cups", 1.25),
137    ///                 Measure::parse_new("grams", 155.5),
138    ///             ],
139    ///             modifier: None,
140    ///         }
141    ///     ))
142    /// );
143    /// ```
144    #[tracing::instrument(name = "parse_ingredient")]
145    pub fn parse_ingredient(self, input: &str) -> Res<&str, Ingredient> {
146        context(
147            "ing",
148            tuple((
149                opt(|a| self.clone().many_amount(a)),
150                space0, // space between amount(s) and name
151                opt(tuple((|a| self.clone().adjective(a), space1))), // optional modifier
152                opt(many1(text)), // name, can be multiple words
153                opt(|a| self.clone().amt_parens(a)), // can have some more amounts in parens after the name
154                opt(tag(", ")),                      // comma seperates the modifier
155                not_line_ending, // modifier, can be multiple words and even include numbers, since once we've hit the comma everything is fair game.
156            )),
157        )(input)
158        .map(|(next_input, res)| {
159            let (
160                amounts,
161                _maybespace,
162                adjective,
163                name_chunks,
164                amounts2,
165                _maybecomma,
166                modifier_chunks,
167            ): (
168                Option<Vec<Measure>>,
169                &str,
170                Option<(String, &str)>,
171                Option<Vec<&str>>,
172                Option<Vec<Measure>>,
173                Option<&str>,
174                &str,
175            ) = res;
176            let mut modifiers: String = modifier_chunks.to_owned();
177            if let Some((adjective, _)) = adjective {
178                modifiers.push_str(&adjective);
179            }
180            let mut name: String = name_chunks
181                .unwrap_or(vec![])
182                .join("")
183                .trim_matches(' ')
184                .to_string();
185
186            // if the ingredient name still has adjective in it, remove that
187            self.adjectives.iter().for_each(|f| {
188                if name.contains(f) {
189                    modifiers.push_str(f);
190                    name = name.replace(f, "").trim_matches(' ').to_string();
191                }
192            });
193
194            let mut amounts = match amounts {
195                Some(a) => a,
196                None => vec![],
197            };
198            amounts = match amounts2 {
199                Some(a) => amounts.into_iter().chain(a.into_iter()).collect(),
200                None => amounts,
201            };
202
203            (
204                next_input,
205                Ingredient {
206                    name,
207                    amounts,
208                    modifier: match modifiers.chars().count() {
209                        0 => None,
210                        _ => Some(modifiers.to_string()),
211                    },
212                },
213            )
214        })
215    }
216    fn get_value(self, input: &str) -> Res<&str, (f64, Option<f64>)> {
217        context(
218            "get_value",
219            alt((
220                |a| self.clone().upper_range_only(a),
221                |a| self.clone().num_or_range(a),
222            )),
223        )(input)
224    }
225
226    fn num_or_range(self, input: &str) -> Res<&str, (f64, Option<f64>)> {
227        context(
228            "num_or_range",
229            tuple((
230                |a| self.clone().num(a),
231                opt(|a| self.clone().range_up_num(a)),
232            )),
233        )(input)
234        .map(|(next_input, res)| {
235            let (val, upper_val) = res;
236            let upper = match upper_val {
237                Some(u) => Some(u),
238                None => None,
239            };
240            (next_input, (val, upper))
241        })
242    }
243
244    fn upper_range_only(self, input: &str) -> Res<&str, (f64, Option<f64>)> {
245        context(
246            "upper_range_only",
247            tuple((
248                opt(space0),
249                alt((tag("up to"), tag("at most"))),
250                space0,
251                |a| self.clone().num(a),
252            )),
253        )(input)
254        .map(|(next_input, res)| (next_input, (0.0, Some(res.3))))
255    }
256
257    fn unit(self, input: &str) -> Res<&str, String> {
258        context(
259            "unit",
260            verify(unitamt, |s: &str| unit::is_valid(self.units.clone(), s)),
261        )(input)
262    }
263    fn adjective(self, input: &str) -> Res<&str, String> {
264        context(
265            "adjective",
266            verify(unitamt, |s: &str| {
267                self.adjectives.contains(&s.to_lowercase())
268            }),
269        )(input)
270    }
271
272    // parses a single amount
273    fn amount1(self, input: &str) -> Res<&str, Measure> {
274        let res = context(
275            "amount1",
276            tuple(
277                (
278                    opt(tag("about ")), // todo: add flag for estimates
279                    opt(|a| self.clone().mult_prefix_1(a)),
280                    |a| self.clone().get_value(a), // value
281                    space0,
282                    opt(|a| self.clone().unit(a)), // unit
283                    opt(alt((tag("."), tag(" of")))),
284                ), // 1 gram
285            ),
286        )(input)
287        .map(|(next_input, res)| {
288            let (_prefix, mult, value, _space, unit, _period) = res;
289            let mut v = value.0;
290            if mult.is_some() {
291                v = v * mult.unwrap();
292            }
293            return (
294                next_input,
295                Measure::from_parts(
296                    unit.unwrap_or("whole".to_string())
297                        .to_string()
298                        .to_lowercase()
299                        .as_ref(),
300                    v,
301                    value.1,
302                ),
303            );
304        });
305        res
306    }
307    // parses an amount like `78g to 104g cornmeal`
308    fn amount_with_units_twice(self, input: &str) -> Res<&str, Option<Measure>> {
309        let res = context(
310            "amount_with_units_twice",
311            tuple((
312                opt(tag("about ")),            // todo: add flag for estimates
313                |a| self.clone().get_value(a), // value
314                space0,
315                opt(|a| self.clone().unit(a)), // unit
316                |a| self.clone().range_up_num(a),
317                opt(|a| self.clone().unit(a)),
318                opt(alt((tag("."), tag(" of")))),
319            )),
320        )(input)
321        .map(|(next_input, res)| {
322            let (_prefix, value, _space, unit, upper_val, upper_unit, _period) = res;
323            if upper_unit.is_some() && unit != upper_unit {
324                info!("unit mismatch: {:?} vs {:?}", unit, upper_unit);
325                // panic!("unit mismatch: {:?} vs {:?}", unit, upper_unit)
326                return (next_input, None);
327            }
328            // let upper = match value.1 {
329            //     Some(u) => Some(u),
330            //     None => upper_val,
331            //      match upper_val {
332            //         Some(u) => Some(u),
333            //         None => None,
334            //     },
335            // };
336            let upper = Some(upper_val);
337            return (
338                next_input,
339                Some(Measure::from_parts(
340                    unit.unwrap_or("whole".to_string())
341                        .to_string()
342                        .to_lowercase()
343                        .as_ref(),
344                    value.0,
345                    upper,
346                )),
347            );
348        });
349        res
350    }
351    // parses 1-n amounts, e.g. `12 grams` or `120 grams / 1 cup`
352    #[tracing::instrument(name = "many_amount")]
353    fn many_amount(self, input: &str) -> Res<&str, Vec<Measure>> {
354        context(
355            "many_amount",
356            separated_list1(
357                alt((tag("; "), tag(" / "), tag(" "), tag(", "), tag("/"))),
358                alt((
359                    |a| self.clone().plus_amount(a).map(|(a, b)| (a, vec![b])),
360                    |a| {
361                        self.clone().amount_with_units_twice(a).map(|(a, b)| {
362                            (
363                                a,
364                                match b {
365                                    Some(a) => vec![a],
366                                    None => vec![],
367                                },
368                            )
369                        })
370                    }, // regular amount
371                    |a| self.clone().amt_parens(a), // amoiunt with parens
372                    |a| self.clone().amount1(a).map(|(a, b)| (a, vec![b])), // regular amount
373                )),
374            ),
375        )(input)
376        .map(|(next_input, res)| {
377            // let (a, b) = res;
378            (next_input, res.into_iter().flatten().collect())
379        })
380    }
381
382    fn amt_parens(self, input: &str) -> Res<&str, Vec<Measure>> {
383        context(
384            "amt_parens",
385            delimited(char('('), |a| self.clone().many_amount(a), char(')')),
386        )(input)
387    }
388    /// handles vulgar fraction, or just a number
389    fn num(self, input: &str) -> Res<&str, f64> {
390        if self.is_rich_text {
391            context("num", alt((fraction_number, double)))(input)
392        } else {
393            context("num", alt((fraction_number, text_number, double)))(input)
394        }
395    }
396    fn mult_prefix_1(self, input: &str) -> Res<&str, f64> {
397        context(
398            "mult_prefix_1",
399            tuple((|a| self.clone().num(a), space1, tag("x"), space1)),
400        )(input)
401        .map(|(next_input, res)| {
402            let (num, _, _, _) = res;
403            (next_input, num)
404        })
405    }
406    fn range_up_num(self, input: &str) -> Res<&str, f64> {
407        context(
408            "range_up_num",
409            alt((
410                tuple((
411                    space0,
412                    alt((tag("-"), tag("–"))), // second dash is an unusual variant
413                    space0,
414                    |a| self.clone().num(a),
415                )),
416                tuple((
417                    space1,
418                    alt((tag("to"), tag("through"), tag("or"))),
419                    space1,
420                    |a| self.clone().num(a),
421                )),
422            )),
423        )(input)
424        .map(|(next_input, (_space1, _, _space2, num))| (next_input, num))
425    }
426    fn plus_amount(self, input: &str) -> Res<&str, Measure> {
427        context(
428            "plus_num",
429            tuple((
430                |a| self.clone().amount1(a),
431                space1,
432                tag("plus"),
433                space1,
434                |a| self.clone().amount1(a),
435            )),
436        )(input)
437        .map(|(next_input, (a, _space1, _, _, b))| {
438            let c = a.add(b).unwrap();
439            return (next_input, c);
440        })
441    }
442}
443
444fn text(input: &str) -> Res<&str, &str> {
445    alt((
446        alpha1,
447        space1,
448        tag("-"),
449        tag("—"),
450        tag("-"),
451        tag("'"),
452        tag("’"),
453        tag("."),
454        tag("è"),
455        tag("î"),
456        tag("ó"),
457        tag("é"),
458        // tag("\""),
459    ))(input)
460}
461fn unitamt(input: &str) -> Res<&str, String> {
462    nom::multi::many0(alt((alpha1, tag("°"), tag("\""))))(input)
463        .map(|(next_input, res)| (next_input, res.join("")))
464}
465
466fn text_number(input: &str) -> Res<&str, f64> {
467    context("text_number", alt((tag("one"), tag("a "))))(input)
468        .map(|(next_input, _)| (next_input, 1.0))
469}
470
471#[cfg(test)]
472mod tests {
473    use std::convert::TryFrom;
474
475    use super::*;
476    #[test]
477    fn test_amount() {
478        assert_eq!(
479            (IngredientParser::new(false)).parse_amount("350 °"),
480            vec![Measure::parse_new("°", 350.0)]
481        );
482        assert_eq!(
483            (IngredientParser::new(false)).parse_amount("350 °F"),
484            vec![Measure::parse_new("°f", 350.0)]
485        );
486    }
487
488    #[test]
489    fn test_amount_range() {
490        assert_eq!(
491            (IngredientParser::new(false)).parse_amount("2¼-2.5 cups"),
492            vec![Measure::parse_new_with_upper("cups", 2.25, 2.5)]
493        );
494
495        assert_eq!(
496            Ingredient::try_from("1-2 cups flour"),
497            Ok(Ingredient {
498                name: "flour".to_string(),
499                amounts: vec![Measure::parse_new_with_upper("cups", 1.0, 2.0)],
500                modifier: None,
501            })
502        );
503        assert_eq!(
504            format!(
505                "{}",
506                (IngredientParser::new(false))
507                    .parse_amount("2 ¼ - 2.5 cups")
508                    .first()
509                    .unwrap()
510            ),
511            "2.25 - 2.5 cups"
512        );
513        assert_eq!(
514            (IngredientParser::new(false)).parse_amount("2 to 4 days"),
515            vec![Measure::parse_new_with_upper("days", 2.0, 4.0)]
516        );
517
518        // #30
519        assert_eq!(
520            (IngredientParser::new(false)).parse_amount("up to 4 days"),
521            vec![Measure::parse_new_with_upper("days", 0.0, 4.0)]
522        );
523    }
524    #[test]
525    fn test_ingredient_parse() {
526        assert_eq!(
527            Ingredient::try_from("12 cups flour"),
528            Ok(Ingredient {
529                name: "flour".to_string(),
530                amounts: vec![Measure::parse_new("cups", 12.0)],
531                modifier: None,
532            })
533        );
534    }
535
536    #[test]
537    fn test_stringy() {
538        assert_eq!(
539            format!("res: {}", from_str("12 cups flour")),
540            "res: 12 cups flour"
541        );
542        assert_eq!(from_str("one whole egg").to_string(), "1 whole egg");
543        assert_eq!(from_str("a tsp flour").to_string(), "1 tsp flour");
544    }
545    #[test]
546    fn test_with_parens() {
547        assert_eq!(
548            from_str("1 cup (125.5 grams) AP flour, sifted").to_string(),
549            "1 cup / 125.5 g AP flour, sifted"
550        );
551    }
552}