geosuggest_utils/
lib.rs

1#![doc = include_str!("../README.md")]
2use anyhow::Result;
3use geosuggest_core::EngineData;
4use std::collections::HashMap;
5use std::io::{Cursor, Read};
6
7#[cfg(feature = "tracing")]
8use std::time::Instant;
9
10use geosuggest_core::{
11    index::{IndexData, SourceFileContentOptions},
12    EngineMetadata, EngineSourceMetadata,
13};
14use serde::Serialize;
15
16#[derive(Serialize, Clone)]
17pub struct SourceItem<'a> {
18    pub url: &'a str,
19    pub filename: &'a str,
20}
21
22#[derive(Serialize, Clone)]
23pub struct IndexUpdaterSettings<'a> {
24    pub http_timeout_ms: u64,
25    pub cities: SourceItem<'a>,
26    pub names: Option<SourceItem<'a>>,
27    pub countries_url: Option<&'a str>,
28    pub admin1_codes_url: Option<&'a str>,
29    pub admin2_codes_url: Option<&'a str>,
30    pub filter_languages: Vec<&'a str>,
31}
32
33impl Default for IndexUpdaterSettings<'_> {
34    fn default() -> Self {
35        IndexUpdaterSettings {
36            http_timeout_ms: 300_000,
37            cities: SourceItem {
38                url: "https://download.geonames.org/export/dump/cities5000.zip",
39                filename: "cities5000.txt",
40            },
41            names: Some(SourceItem {
42                url: "https://download.geonames.org/export/dump/alternateNamesV2.zip",
43                filename: "alternateNamesV2.txt",
44            }),
45            countries_url: Some("https://download.geonames.org/export/dump/countryInfo.txt"),
46            admin1_codes_url: Some(
47                "https://download.geonames.org/export/dump/admin1CodesASCII.txt",
48            ),
49            admin2_codes_url: Some("https://download.geonames.org/export/dump/admin2Codes.txt"),
50            filter_languages: Vec::new(),
51            // max_payload_size: 200 * 1024 * 1024,
52        }
53    }
54}
55
56pub struct IndexUpdater<'a> {
57    http_client: reqwest::Client,
58    settings: IndexUpdaterSettings<'a>,
59}
60
61impl<'a> IndexUpdater<'a> {
62    pub fn new(settings: IndexUpdaterSettings<'a>) -> Result<Self> {
63        Ok(IndexUpdater {
64            http_client: reqwest::ClientBuilder::new()
65                .timeout(std::time::Duration::from_millis(settings.http_timeout_ms))
66                .build()?,
67            settings,
68        })
69    }
70
71    pub async fn has_updates(&self, metadata: &EngineMetadata) -> Result<bool> {
72        #[cfg(feature = "tracing")]
73        tracing::info!("Check updates");
74        if metadata.source.etag.is_empty() {
75            #[cfg(feature = "tracing")]
76            tracing::info!("Engine hasn't source ETAGs");
77            return Ok(true);
78        }
79
80        let mut requests = vec![self.get_etag(self.settings.cities.url)];
81        let mut results = vec!["cities"];
82        if let Some(item) = &self.settings.names {
83            requests.push(self.get_etag(item.url));
84            results.push("names");
85        }
86        if let Some(url) = self.settings.countries_url {
87            requests.push(self.get_etag(url));
88            results.push("countries");
89        }
90        if let Some(url) = self.settings.admin1_codes_url {
91            requests.push(self.get_etag(url));
92            results.push("admin1_codes");
93        }
94        let responses = futures::future::join_all(requests).await;
95        let results: HashMap<_, _> = results.into_iter().zip(responses.into_iter()).collect();
96
97        for (entry, etag) in results {
98            let current_etag = metadata
99                .source
100                .etag
101                .get(entry)
102                .map(AsRef::as_ref)
103                .unwrap_or("");
104            let new_etag = etag?;
105            if current_etag != new_etag {
106                #[cfg(feature = "tracing")]
107                tracing::info!("New version of {entry}");
108                return Ok(true);
109            }
110        }
111
112        Ok(false)
113    }
114
115    pub async fn get_etag(&self, url: &str) -> Result<String> {
116        let response = self.http_client.head(url).send().await?;
117        #[cfg(feature = "tracing")]
118        tracing::info!("Try HEAD {url}");
119
120        Ok(response
121            .headers()
122            .get(reqwest::header::ETAG)
123            .and_then(|v| v.to_str().ok())
124            .map(String::from)
125            .unwrap_or_default())
126    }
127
128    pub async fn fetch(&self, url: &str, filename: Option<&str>) -> Result<(String, Vec<u8>)> {
129        let response = self.http_client.get(url).send().await?;
130        #[cfg(feature = "tracing")]
131        tracing::info!("Try GET {url}");
132
133        if !response.status().is_success() {
134            anyhow::bail!("GET {url} return status {}", response.status())
135        }
136
137        let etag = response
138            .headers()
139            .get(reqwest::header::ETAG)
140            .and_then(|v| v.to_str().ok())
141            .map(String::from)
142            .unwrap_or_default();
143
144        let content = response.bytes().await?.to_vec();
145        #[cfg(feature = "tracing")]
146        tracing::info!("Downloaded {url} size: {}", content.len());
147
148        let content = if let Some(filename) = filename {
149            #[cfg(feature = "tracing")]
150            tracing::info!("Unzip {filename}");
151            let cursor = Cursor::new(content);
152            let mut archive = zip::read::ZipArchive::new(cursor)?;
153            let mut file = archive
154                .by_name(filename)
155                .map_err(|e| anyhow::anyhow!("On get file {filename} from archive: {e}"))?;
156            let mut buf = Vec::new();
157            file.read_to_end(&mut buf)?;
158            buf
159        } else {
160            content
161        };
162
163        Ok((etag, content))
164    }
165
166    pub async fn build(self) -> Result<EngineData> {
167        let mut requests = vec![self.fetch(
168            self.settings.cities.url,
169            Some(self.settings.cities.filename),
170        )];
171        let mut results = vec!["cities"];
172        if let Some(item) = &self.settings.names {
173            requests.push(self.fetch(item.url, Some(item.filename)));
174            results.push("names");
175        }
176        if let Some(url) = self.settings.countries_url {
177            requests.push(self.fetch(url, None));
178            results.push("countries");
179        }
180        if let Some(url) = self.settings.admin1_codes_url {
181            requests.push(self.fetch(url, None));
182            results.push("admin1_codes");
183        }
184        if let Some(url) = self.settings.admin2_codes_url {
185            requests.push(self.fetch(url, None));
186            results.push("admin2_codes");
187        }
188        let responses = futures::future::join_all(requests).await;
189        let mut results: HashMap<_, _> = results.into_iter().zip(responses.into_iter()).collect();
190
191        let etag = results
192            .iter()
193            .filter_map(|(k, v)| {
194                let Ok((etag, _)) = v else { return None };
195                Some(((*k).to_string(), etag.to_string()))
196            })
197            .collect();
198
199        #[cfg(feature = "tracing")]
200        tracing::info!("Try to build index...");
201
202        #[cfg(feature = "tracing")]
203        let now = Instant::now();
204
205        let data = IndexData::new_from_files_content(SourceFileContentOptions {
206            cities: String::from_utf8(
207                results
208                    .remove(&"cities")
209                    .ok_or_else(|| anyhow::anyhow!("Cities file required"))?
210                    .map_err(|e| anyhow::anyhow!("On fetch cities file: {e}"))?
211                    .1, // .ok_or_else(|| anyhow::anyhow!("Cities file required"))?,
212            )?,
213            names: if let Some(c) = results.remove(&"names") {
214                Some(String::from_utf8(c?.1)?)
215            } else {
216                None
217            },
218            countries: if let Some(c) = results.remove(&"countries") {
219                Some(String::from_utf8(c?.1)?)
220            } else {
221                None
222            },
223            admin1_codes: if let Some(c) = results.remove(&"admin1_codes") {
224                Some(String::from_utf8(c?.1)?)
225            } else {
226                None
227            },
228            admin2_codes: if let Some(c) = results.remove(&"admin2_codes") {
229                Some(String::from_utf8(c?.1)?)
230            } else {
231                None
232            },
233            filter_languages: self.settings.filter_languages.clone(),
234        })
235        .map_err(|e| anyhow::anyhow!("Failed to build index: {e}"))?;
236
237        let mut engine_data = EngineData::try_from(data)?;
238
239        engine_data.metadata = Some(EngineMetadata {
240            source: EngineSourceMetadata {
241                cities: self.settings.cities.url.to_owned(),
242                names: self.settings.names.as_ref().map(|v| v.url.to_owned()),
243                countries: self.settings.countries_url.map(String::from),
244                admin1_codes: self.settings.admin1_codes_url.map(String::from),
245                admin2_codes: self.settings.admin2_codes_url.map(String::from),
246                filter_languages: self
247                    .settings
248                    .filter_languages
249                    .into_iter()
250                    .map(String::from)
251                    .collect::<Vec<_>>(),
252                etag,
253            },
254            ..Default::default()
255        });
256
257        #[cfg(feature = "tracing")]
258        tracing::info!("Engine data ready. took {}ms", now.elapsed().as_millis());
259
260        Ok(engine_data)
261    }
262}