1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
use serde_derive::Deserialize;

/// Error returned when fetching fails.
///
/// This currently implements only `Debug` and `Display`. New traits/methods may be implemented in
/// the future.
#[derive(thiserror::Error, Debug)]
#[error(transparent)]
pub struct Error (anyhow::Error);

/// Food within daily menu.
pub struct MenuItem {
    /// Description of the food
    pub description: String,
    /// Food price.
    ///
    /// Note that sometimes the price may be empty!
    pub price: String,
}

/// Daily menu of a restaurant.
///
/// This is a menu for specific day.
pub struct Menu {
    /// Date of the menu.
    pub date: String,
    /// Food items offered at this day.
    pub items: Vec<MenuItem>,
}

/// Fetches daily manu of given restaurant.
///
/// You can get restaurant name by visiting it at Zomato using browser and copying it from the URL.
///
/// This returns heap-allocated menu, because iterator would require self-referential return value.
/// If you happen to have some clever idea to work around this, I'll happily accept a PR.
pub async fn get_daily_menu(city: &str, restaurant: &str) -> Result<Vec<Menu>, Error> {
    get_daily_menu_internal(city, restaurant).await.map_err(Error)
}

#[derive(Deserialize, Debug)]
struct InternalMenuItem {
    name: String,
    #[serde(rename = "displayPrice")]
    price: String,
}

#[derive(Deserialize)]
struct DailyMenu {
    dishes: Vec<InternalMenuItem>,
    #[serde(rename = "timeHeading")]
    date: String,
}

#[derive(Deserialize)]
struct Sections {
    #[serde(rename = "SECTION_DAILY_MENU")]
    daily_menu: Vec<DailyMenu>,
}

#[derive(Deserialize)]
struct UnknownObject {
    sections: Sections,
}

#[derive(Deserialize)]
struct Pages {
    restaurant: std::collections::HashMap<String, UnknownObject>,
}

#[derive(Deserialize)]
struct Data {
    pages: Pages,
}

// We use internal function with `anyhow::Error` for convenience and it gets translated into our
// `Error` in the public function. This allows us to maintain ability to extend error type with
// information, while making it easy to write the initial version of library.
async fn get_daily_menu_internal(city: &str, restaurant: &str) -> Result<Vec<Menu>, anyhow::Error> {
    use scraper::Selector;
    use anyhow::Context;

    let url = format!("https://www.zomato.com/{}/{}/daily-menu", city, restaurant);
    #[cfg(feature = "debug-log")]
    let verbose = true;
    #[cfg(not(feature = "debug-log"))]
    let verbose = false;
    let req_builder = reqwest::Client::builder()
        .connection_verbose(verbose)
        .build()?
        .request(reqwest::Method::GET, &url)
        // I found that zomato server has some problems when some headers are passed,
        // so I copied everything from Mozilla Firefox.
        .header("User-Agent", "Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0")
        .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
        // This seems to be important
        .header("Accept-Encoding", "identity")
        // keep-alive must be lower case - not Keep-Alive!
        .header("Connection", "keep-alive")
        .header("DNT", "1")
        .header("Upgrade-Insecure-Requests", "1")
        .header("Cache-Control", "max-age=0")
        .header("Accept-Language", "en-US,en;q=0.5");

    let response = req_builder.send()
        .await?
        .bytes()
        .await?;
    let response_decoded = std::str::from_utf8(&response)?;
    let html = scraper::Html::parse_document(response_decoded);
    let script = html
        .select(&Selector::parse("script").unwrap())
        .into_iter()
        .filter_map(|script| script.text().next())
        .find(|script| script.contains("window.__PRELOADED_STATE__ = JSON.parse(\""))
        .ok_or_else(|| anyhow::anyhow!("data not found"))?;

    let mut iter = script.split("window.__PRELOADED_STATE__ = JSON.parse(\"");
    iter.next().expect("empty split");
    let json_with_tail = iter.next().expect("missing pattern");
    let json_escaped = json_with_tail.split("\")\n").next().expect("empty split");
    let mut json_unescaped = String::with_capacity(json_escaped.len());
    for piece in json_escaped.split("\\\"") {
        if !json_unescaped.is_empty() {
            json_unescaped.push_str("\"");
        }
        json_unescaped.push_str(piece);
    }
    let data = serde_json::from_str::<Data>(&json_unescaped).context("failed to parse json")?;
    let result = data
        .pages
        .restaurant
        .into_iter()
        .next()
        .ok_or_else(|| anyhow::anyhow!("missing restaurant"))?
        .1
        .sections.daily_menu
        .into_iter()
        .map(|menu| {
            let items = menu
                .dishes
                .into_iter()
                .map(|item| MenuItem {
                    description: item.name,
                    price: item.price,
                })
                .collect::<Vec<_>>();
            Menu {
                items,
                date: menu.date,
            }
        })
        .collect::<Vec<_>>();

    Ok(result)
}