1#![doc = include_str!("../README.md")]
2use anyhow::Result;
3use geosuggest_core::EngineData;
4use std::collections::HashMap;
5use std::io::{Cursor, Read};
6
7#[cfg(feature = "tracing")]
8use std::time::Instant;
9
10use geosuggest_core::{
11 index::{IndexData, SourceFileContentOptions},
12 EngineMetadata, EngineSourceMetadata,
13};
14use serde::Serialize;
15
16#[derive(Serialize, Clone)]
17pub struct SourceItem<'a> {
18 pub url: &'a str,
19 pub filename: &'a str,
20}
21
22#[derive(Serialize, Clone)]
23pub struct IndexUpdaterSettings<'a> {
24 pub http_timeout_ms: u64,
25 pub cities: SourceItem<'a>,
26 pub names: Option<SourceItem<'a>>,
27 pub countries_url: Option<&'a str>,
28 pub admin1_codes_url: Option<&'a str>,
29 pub admin2_codes_url: Option<&'a str>,
30 pub filter_languages: Vec<&'a str>,
31}
32
33impl Default for IndexUpdaterSettings<'_> {
34 fn default() -> Self {
35 IndexUpdaterSettings {
36 http_timeout_ms: 300_000,
37 cities: SourceItem {
38 url: "https://download.geonames.org/export/dump/cities5000.zip",
39 filename: "cities5000.txt",
40 },
41 names: Some(SourceItem {
42 url: "https://download.geonames.org/export/dump/alternateNamesV2.zip",
43 filename: "alternateNamesV2.txt",
44 }),
45 countries_url: Some("https://download.geonames.org/export/dump/countryInfo.txt"),
46 admin1_codes_url: Some(
47 "https://download.geonames.org/export/dump/admin1CodesASCII.txt",
48 ),
49 admin2_codes_url: Some("https://download.geonames.org/export/dump/admin2Codes.txt"),
50 filter_languages: Vec::new(),
51 }
53 }
54}
55
56pub struct IndexUpdater<'a> {
57 http_client: reqwest::Client,
58 settings: IndexUpdaterSettings<'a>,
59}
60
61impl<'a> IndexUpdater<'a> {
62 pub fn new(settings: IndexUpdaterSettings<'a>) -> Result<Self> {
63 Ok(IndexUpdater {
64 http_client: reqwest::ClientBuilder::new()
65 .timeout(std::time::Duration::from_millis(settings.http_timeout_ms))
66 .build()?,
67 settings,
68 })
69 }
70
71 pub async fn has_updates(&self, metadata: &EngineMetadata) -> Result<bool> {
72 #[cfg(feature = "tracing")]
73 tracing::info!("Check updates");
74 if metadata.source.etag.is_empty() {
75 #[cfg(feature = "tracing")]
76 tracing::info!("Engine hasn't source ETAGs");
77 return Ok(true);
78 }
79
80 let mut requests = vec![self.get_etag(self.settings.cities.url)];
81 let mut results = vec!["cities"];
82 if let Some(item) = &self.settings.names {
83 requests.push(self.get_etag(item.url));
84 results.push("names");
85 }
86 if let Some(url) = self.settings.countries_url {
87 requests.push(self.get_etag(url));
88 results.push("countries");
89 }
90 if let Some(url) = self.settings.admin1_codes_url {
91 requests.push(self.get_etag(url));
92 results.push("admin1_codes");
93 }
94 let responses = futures::future::join_all(requests).await;
95 let results: HashMap<_, _> = results.into_iter().zip(responses.into_iter()).collect();
96
97 for (entry, etag) in results {
98 let current_etag = metadata
99 .source
100 .etag
101 .get(entry)
102 .map(AsRef::as_ref)
103 .unwrap_or("");
104 let new_etag = etag?;
105 if current_etag != new_etag {
106 #[cfg(feature = "tracing")]
107 tracing::info!("New version of {entry}");
108 return Ok(true);
109 }
110 }
111
112 Ok(false)
113 }
114
115 pub async fn get_etag(&self, url: &str) -> Result<String> {
116 let response = self.http_client.head(url).send().await?;
117 #[cfg(feature = "tracing")]
118 tracing::info!("Try HEAD {url}");
119
120 Ok(response
121 .headers()
122 .get(reqwest::header::ETAG)
123 .and_then(|v| v.to_str().ok())
124 .map(String::from)
125 .unwrap_or_default())
126 }
127
128 pub async fn fetch(&self, url: &str, filename: Option<&str>) -> Result<(String, Vec<u8>)> {
129 let response = self.http_client.get(url).send().await?;
130 #[cfg(feature = "tracing")]
131 tracing::info!("Try GET {url}");
132
133 if !response.status().is_success() {
134 anyhow::bail!("GET {url} return status {}", response.status())
135 }
136
137 let etag = response
138 .headers()
139 .get(reqwest::header::ETAG)
140 .and_then(|v| v.to_str().ok())
141 .map(String::from)
142 .unwrap_or_default();
143
144 let content = response.bytes().await?.to_vec();
145 #[cfg(feature = "tracing")]
146 tracing::info!("Downloaded {url} size: {}", content.len());
147
148 let content = if let Some(filename) = filename {
149 #[cfg(feature = "tracing")]
150 tracing::info!("Unzip {filename}");
151 let cursor = Cursor::new(content);
152 let mut archive = zip::read::ZipArchive::new(cursor)?;
153 let mut file = archive
154 .by_name(filename)
155 .map_err(|e| anyhow::anyhow!("On get file {filename} from archive: {e}"))?;
156 let mut buf = Vec::new();
157 file.read_to_end(&mut buf)?;
158 buf
159 } else {
160 content
161 };
162
163 Ok((etag, content))
164 }
165
166 pub async fn build(self) -> Result<EngineData> {
167 let mut requests = vec![self.fetch(
168 self.settings.cities.url,
169 Some(self.settings.cities.filename),
170 )];
171 let mut results = vec!["cities"];
172 if let Some(item) = &self.settings.names {
173 requests.push(self.fetch(item.url, Some(item.filename)));
174 results.push("names");
175 }
176 if let Some(url) = self.settings.countries_url {
177 requests.push(self.fetch(url, None));
178 results.push("countries");
179 }
180 if let Some(url) = self.settings.admin1_codes_url {
181 requests.push(self.fetch(url, None));
182 results.push("admin1_codes");
183 }
184 if let Some(url) = self.settings.admin2_codes_url {
185 requests.push(self.fetch(url, None));
186 results.push("admin2_codes");
187 }
188 let responses = futures::future::join_all(requests).await;
189 let mut results: HashMap<_, _> = results.into_iter().zip(responses.into_iter()).collect();
190
191 let etag = results
192 .iter()
193 .filter_map(|(k, v)| {
194 let Ok((etag, _)) = v else { return None };
195 Some(((*k).to_string(), etag.to_string()))
196 })
197 .collect();
198
199 #[cfg(feature = "tracing")]
200 tracing::info!("Try to build index...");
201
202 #[cfg(feature = "tracing")]
203 let now = Instant::now();
204
205 let data = IndexData::new_from_files_content(SourceFileContentOptions {
206 cities: String::from_utf8(
207 results
208 .remove(&"cities")
209 .ok_or_else(|| anyhow::anyhow!("Cities file required"))?
210 .map_err(|e| anyhow::anyhow!("On fetch cities file: {e}"))?
211 .1, )?,
213 names: if let Some(c) = results.remove(&"names") {
214 Some(String::from_utf8(c?.1)?)
215 } else {
216 None
217 },
218 countries: if let Some(c) = results.remove(&"countries") {
219 Some(String::from_utf8(c?.1)?)
220 } else {
221 None
222 },
223 admin1_codes: if let Some(c) = results.remove(&"admin1_codes") {
224 Some(String::from_utf8(c?.1)?)
225 } else {
226 None
227 },
228 admin2_codes: if let Some(c) = results.remove(&"admin2_codes") {
229 Some(String::from_utf8(c?.1)?)
230 } else {
231 None
232 },
233 filter_languages: self.settings.filter_languages.clone(),
234 })
235 .map_err(|e| anyhow::anyhow!("Failed to build index: {e}"))?;
236
237 let mut engine_data = EngineData::try_from(data)?;
238
239 engine_data.metadata = Some(EngineMetadata {
240 source: EngineSourceMetadata {
241 cities: self.settings.cities.url.to_owned(),
242 names: self.settings.names.as_ref().map(|v| v.url.to_owned()),
243 countries: self.settings.countries_url.map(String::from),
244 admin1_codes: self.settings.admin1_codes_url.map(String::from),
245 admin2_codes: self.settings.admin2_codes_url.map(String::from),
246 filter_languages: self
247 .settings
248 .filter_languages
249 .into_iter()
250 .map(String::from)
251 .collect::<Vec<_>>(),
252 etag,
253 },
254 ..Default::default()
255 });
256
257 #[cfg(feature = "tracing")]
258 tracing::info!("Engine data ready. took {}ms", now.elapsed().as_millis());
259
260 Ok(engine_data)
261 }
262}