geosuggest_core/
index.rs

1use itertools::Itertools;
2use rayon::prelude::*;
3use std::collections::{HashMap, HashSet};
4use std::error::Error;
5
6#[cfg(feature = "oaph")]
7use oaph::schemars::{self, JsonSchema};
8
9use kiddo::immutable::float::kdtree::ImmutableKdTree;
10use rkyv::collections::swiss_table::ArchivedHashMap;
11use rkyv::option::ArchivedOption;
12use rkyv::rend::{f32_le, u32_le};
13use rkyv::string::ArchivedString;
14use serde::ser::{SerializeMap, Serializer};
15
16#[cfg(feature = "tracing")]
17use std::time::Instant;
18
19pub fn skip_comment_lines(content: &str) -> String {
20    content.lines().filter(|l| !l.starts_with('#')).join("\n")
21}
22
23fn split_content_to_n_parts(content: &str, n: usize) -> Vec<String> {
24    if n == 0 || n == 1 {
25        return vec![content.to_owned()];
26    }
27
28    let lines: Vec<&str> = content.lines().collect();
29    lines.chunks(n).map(|chunk| chunk.join("\n")).collect()
30}
31
32pub struct SourceFileOptions<'a, P: AsRef<std::path::Path>> {
33    pub cities: P,
34    pub names: Option<P>,
35    pub countries: Option<P>,
36    pub admin1_codes: Option<P>,
37    pub admin2_codes: Option<P>,
38    pub filter_languages: Vec<&'a str>,
39}
40
41pub struct SourceFileContentOptions<'a> {
42    pub cities: String,
43    pub names: Option<String>,
44    pub countries: Option<String>,
45    pub admin1_codes: Option<String>,
46    pub admin2_codes: Option<String>,
47    pub filter_languages: Vec<&'a str>,
48}
49
50#[derive(Clone, rkyv::Deserialize, rkyv::Serialize, rkyv::Archive)]
51pub struct IndexData {
52    pub entries: Vec<Entry>,
53    pub geonames: HashMap<u32, CitiesRecord>,
54    pub capitals: HashMap<String, u32>,
55    pub country_info_by_code: HashMap<String, CountryRecord>,
56    pub tree: ImmutableKdTree<f32, u32, 2, 32>,
57    pub tree_index_to_geonameid: HashMap<usize, u32>,
58}
59
60#[derive(Clone, rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)]
61pub struct Entry {
62    pub id: u32,                 // geoname id
63    pub value: String,           // searchable value
64    pub country_id: Option<u32>, // geoname country id
65}
66
67// code, name, name ascii, geonameid
68#[derive(Debug, Clone, serde::Deserialize)]
69struct Admin1CodeRecordRaw {
70    code: String,
71    name: String,
72    _asciiname: String,
73    geonameid: u32,
74}
75
76// code, name, name ascii, geonameid
77#[derive(Debug, Clone, serde::Deserialize)]
78struct Admin2CodeRecordRaw {
79    code: String,
80    name: String,
81    _asciiname: String,
82    geonameid: u32,
83}
84
85#[derive(Debug, Clone, serde::Serialize, rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)]
86#[cfg_attr(feature = "oaph", derive(JsonSchema))]
87#[rkyv(derive(serde::Serialize, Debug))]
88pub struct AdminDivision {
89    #[rkyv(attr(serde(serialize_with = "serialize_archived_u32")))]
90    pub id: u32,
91    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
92    pub code: String,
93    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
94    pub name: String,
95}
96
97// The main 'geoname' table has the following fields :
98// ---------------------------------------------------
99// geonameid         : integer id of record in geonames database
100// name              : name of geographical point (utf8) varchar(200)
101// asciiname         : name of geographical point in plain ascii characters, varchar(200)
102// alternatenames    : alternatenames, comma separated, ascii names automatically transliterated, convenience attribute from alternatename table, varchar(10000)
103// latitude          : latitude in decimal degrees (wgs84)
104// longitude         : longitude in decimal degrees (wgs84)
105// feature class     : see http://www.geonames.org/export/codes.html, char(1)
106// feature code      : see http://www.geonames.org/export/codes.html, varchar(10)
107// country code      : ISO-3166 2-letter country code, 2 characters
108// cc2               : alternate country codes, comma separated, ISO-3166 2-letter country code, 200 characters
109// admin1 code       : fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20)
110// admin2 code       : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)
111// admin3 code       : code for third level administrative division, varchar(20)
112// admin4 code       : code for fourth level administrative division, varchar(20)
113// population        : bigint (8 byte int)
114// elevation         : in meters, integer
115// dem               : digital elevation model, srtm3 or gtopo30, average elevation of 3''x3'' (ca 90mx90m) or 30''x30'' (ca 900mx900m) area in meters, integer. srtm processed by cgiar/ciat.
116// timezone          : the iana timezone id (see file timeZone.txt) varchar(40)
117// modification date : date of last modification in yyyy-MM-dd format
118
119#[derive(Debug, serde::Deserialize)]
120struct CitiesRecordRaw {
121    geonameid: u32,
122    name: String,
123    asciiname: String,
124    alternatenames: String,
125    latitude: f32,
126    longitude: f32,
127    _feature_class: String,
128    feature_code: String,
129    country_code: String,
130    _cc2: String,
131    admin1_code: String,
132    admin2_code: String,
133    _admin3_code: String,
134    _admin4_code: String,
135    population: u32,
136    _elevation: String,
137    _dem: String,
138    timezone: String,
139    _modification_date: String,
140}
141
142// CounntryInfo
143// http://download.geonames.org/export/dump/countryInfo.txt
144// ISO	ISO3	ISO-Numeric	fips	Country	Capital	Area(in sq km)	Population	Continent	tld	CurrencyCode	CurrencyName	Phone	Postal Code Format	Postal Code Regex	Languages	geonameid	neighbours	EquivalentFipsCode
145#[derive(Debug, Clone, serde::Deserialize, rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)]
146#[rkyv(derive(serde::Serialize, Debug))]
147pub struct CountryRecordRaw {
148    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
149    pub iso: String,
150    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
151    pub iso3: String,
152    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
153    pub iso_numeric: String,
154    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
155    pub fips: String,
156    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
157    pub name: String,
158    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
159    pub capital: String,
160    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
161    pub area: String,
162    #[rkyv(attr(serde(serialize_with = "serialize_archived_u32")))]
163    pub population: u32,
164    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
165    pub continent: String,
166    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
167    pub tld: String,
168    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
169    pub currency_code: String,
170    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
171    pub currency_name: String,
172    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
173    pub phone: String,
174    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
175    pub postal_code_format: String,
176    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
177    pub postal_code_regex: String,
178    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
179    pub languages: String,
180    #[rkyv(attr(serde(serialize_with = "serialize_archived_u32")))]
181    pub geonameid: u32,
182    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
183    pub neighbours: String,
184    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
185    pub equivalent_fips_code: String,
186}
187
188#[derive(Debug, Clone, rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)]
189#[rkyv(derive(Debug, serde::Serialize))]
190pub struct CountryRecord {
191    /// geonames country info
192    pub info: CountryRecordRaw,
193
194    /// Country name translation
195    #[rkyv(attr(serde(serialize_with = "serialize_archived_optional_map")))]
196    pub names: Option<HashMap<String, String>>,
197
198    /// Capital name translation
199    #[rkyv(attr(serde(serialize_with = "serialize_archived_optional_map")))]
200    pub capital_names: Option<HashMap<String, String>>,
201}
202
203// The table 'alternate names' :
204// -----------------------------
205// alternateNameId   : the id of this alternate name, int
206// geonameid         : geonameId referring to id in table 'geoname', int
207// isolanguage       : iso 639 language code 2- or 3-characters; 4-characters 'post' for postal codes and 'iata','icao' and faac for airport codes, fr_1793 for French Revolution names,  abbr for abbreviation, link to a website (mostly to wikipedia), wkdt for the wikidataid, varchar(7)
208// alternate name    : alternate name or name variant, varchar(400)
209// isPreferredName   : '1', if this alternate name is an official/preferred name
210// isShortName       : '1', if this is a short name like 'California' for 'State of California'
211// isColloquial      : '1', if this alternate name is a colloquial or slang term. Example: 'Big Apple' for 'New York'.
212// isHistoric        : '1', if this alternate name is historic and was used in the past. Example 'Bombay' for 'Mumbai'.
213// from		  : from period when the name was used
214// to		  : to period when the name was used
215#[derive(Debug, serde::Deserialize)]
216struct AlternateNamesRaw {
217    _alternate_name_id: u32,
218    geonameid: u32,
219    isolanguage: String,
220    alternate_name: String,
221    is_preferred_name: String,
222    is_short_name: String,
223    is_colloquial: String,
224    is_historic: String,
225    _from: String,
226    _to: String,
227}
228
229#[cfg_attr(feature = "oaph", derive(JsonSchema))]
230#[derive(Debug, Clone, serde::Serialize, rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)]
231#[rkyv(derive(serde::Serialize, Debug))]
232pub struct Country {
233    #[rkyv(attr(serde(serialize_with = "serialize_archived_u32")))]
234    pub id: u32,
235    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
236    pub code: String,
237    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
238    pub name: String,
239}
240
241impl From<&CountryRecordRaw> for Country {
242    fn from(c: &CountryRecordRaw) -> Self {
243        Country {
244            id: c.geonameid,
245            code: c.iso.clone(),
246            name: c.name.clone(),
247        }
248    }
249}
250
251#[cfg_attr(feature = "oaph", derive(JsonSchema))]
252#[derive(Debug, Clone, serde::Serialize, rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)]
253#[rkyv(derive(serde::Serialize, Debug))]
254pub struct CitiesRecord {
255    #[rkyv(attr(serde(serialize_with = "serialize_archived_u32")))]
256    pub id: u32,
257    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
258    pub name: String,
259    #[rkyv(attr(serde(serialize_with = "serialize_archived_f32")))]
260    pub latitude: f32,
261    #[rkyv(attr(serde(serialize_with = "serialize_archived_f32")))]
262    pub longitude: f32,
263    #[rkyv(attr(serde(serialize_with = "serialize_archived_option")))]
264    pub country: Option<Country>,
265    #[rkyv(attr(serde(serialize_with = "serialize_archived_option")))]
266    pub admin_division: Option<AdminDivision>,
267    #[rkyv(attr(serde(serialize_with = "serialize_archived_option")))]
268    pub admin2_division: Option<AdminDivision>,
269    #[rkyv(attr(serde(serialize_with = "serialize_archived_string")))]
270    pub timezone: String,
271    #[rkyv(attr(serde(serialize_with = "serialize_archived_optional_map")))]
272    pub names: Option<HashMap<String, String>>,
273    // todo try reuse country info
274    #[rkyv(attr(serde(serialize_with = "serialize_archived_optional_map")))]
275    pub country_names: Option<HashMap<String, String>>,
276    #[rkyv(attr(serde(serialize_with = "serialize_archived_optional_map")))]
277    pub admin1_names: Option<HashMap<String, String>>,
278    #[rkyv(attr(serde(serialize_with = "serialize_archived_optional_map")))]
279    pub admin2_names: Option<HashMap<String, String>>,
280    #[rkyv(attr(serde(serialize_with = "serialize_archived_u32")))]
281    pub population: u32,
282}
283
284impl IndexData {
285    pub fn new_from_files<P: AsRef<std::path::Path>>(
286        SourceFileOptions {
287            cities,
288            names,
289            countries,
290            filter_languages,
291            admin1_codes,
292            admin2_codes,
293        }: SourceFileOptions<P>,
294    ) -> Result<Self, Box<dyn Error>> {
295        Self::new_from_files_content(SourceFileContentOptions {
296            cities: std::fs::read_to_string(cities)?,
297            names: if let Some(p) = names {
298                Some(std::fs::read_to_string(p)?)
299            } else {
300                None
301            },
302            countries: if let Some(p) = countries {
303                Some(std::fs::read_to_string(p)?)
304            } else {
305                None
306            },
307            admin1_codes: if let Some(p) = admin1_codes {
308                Some(std::fs::read_to_string(p)?)
309            } else {
310                None
311            },
312            admin2_codes: if let Some(p) = admin2_codes {
313                Some(std::fs::read_to_string(p)?)
314            } else {
315                None
316            },
317            filter_languages,
318        })
319    }
320    pub fn new_from_files_content(
321        SourceFileContentOptions {
322            cities,
323            names,
324            countries,
325            filter_languages,
326            admin1_codes,
327            admin2_codes,
328        }: SourceFileContentOptions,
329    ) -> Result<Self, Box<dyn Error>> {
330        #[cfg(feature = "tracing")]
331        let now = Instant::now();
332
333        let records = split_content_to_n_parts(&cities, rayon::current_num_threads())
334            .par_iter()
335            .map(|chunk| {
336                let mut rdr = csv::ReaderBuilder::new()
337                    .has_headers(false)
338                    .delimiter(b'\t')
339                    .from_reader(chunk.as_bytes());
340
341                rdr.deserialize()
342                    .filter_map(|row| {
343                        let record: CitiesRecordRaw = row.ok()?;
344                        Some(record)
345                    })
346                    .collect::<Vec<CitiesRecordRaw>>()
347            })
348            .reduce(Vec::new, |mut m1, ref mut m2| {
349                m1.append(m2);
350                m1
351            });
352
353        let mut geonames: Vec<CitiesRecord> = Vec::with_capacity(records.len());
354        let mut entries: Vec<Entry> = Vec::with_capacity(
355            records.len()
356                * if filter_languages.is_empty() {
357                    1
358                } else {
359                    filter_languages.len()
360                },
361        );
362
363        #[cfg(feature = "tracing")]
364        tracing::info!(
365            "Engine read {} cities took {}ms",
366            records.len(),
367            now.elapsed().as_millis(),
368        );
369
370        // load country info
371        let country_by_code: Option<HashMap<String, CountryRecordRaw>> = match countries {
372            Some(contents) => {
373                #[cfg(feature = "tracing")]
374                let now = Instant::now();
375
376                let contents = skip_comment_lines(&contents);
377
378                let mut rdr = csv::ReaderBuilder::new()
379                    .has_headers(false)
380                    .delimiter(b'\t')
381                    .from_reader(contents.as_bytes());
382
383                let countries = rdr
384                    .deserialize()
385                    .filter_map(|row| {
386                        let record: CountryRecordRaw = row
387                            .map_err(|e| {
388                                #[cfg(feature = "tracing")]
389                                tracing::error!("On read country row: {e}");
390
391                                e
392                            })
393                            .ok()?;
394                        Some((record.iso.clone(), record))
395                    })
396                    .collect::<HashMap<String, CountryRecordRaw>>();
397
398                #[cfg(feature = "tracing")]
399                tracing::info!(
400                    "Engine read {} countries took {}ms",
401                    countries.len(),
402                    now.elapsed().as_millis(),
403                );
404
405                Some(countries)
406            }
407            None => None,
408        };
409
410        // load admin1 code info
411        let admin1_by_code: Option<HashMap<String, AdminDivision>> = match admin1_codes {
412            Some(contents) => {
413                #[cfg(feature = "tracing")]
414                let now = Instant::now();
415
416                let mut rdr = csv::ReaderBuilder::new()
417                    .has_headers(false)
418                    .delimiter(b'\t')
419                    .from_reader(contents.as_bytes());
420
421                let admin_division = rdr
422                    .deserialize()
423                    .filter_map(|row| {
424                        let record: Admin1CodeRecordRaw = row.ok()?;
425                        Some((
426                            record.code.clone(),
427                            AdminDivision {
428                                id: record.geonameid,
429                                code: record.code,
430                                name: record.name,
431                            },
432                        ))
433                    })
434                    .collect::<HashMap<String, AdminDivision>>();
435
436                #[cfg(feature = "tracing")]
437                tracing::info!(
438                    "Engine read {} admin1 codes took {}ms",
439                    admin_division.len(),
440                    now.elapsed().as_millis(),
441                );
442
443                Some(admin_division)
444            }
445            None => None,
446        };
447
448        // load admin2 code info
449        let admin2_by_code: Option<HashMap<String, AdminDivision>> = match admin2_codes {
450            Some(contents) => {
451                #[cfg(feature = "tracing")]
452                let now = Instant::now();
453
454                let mut rdr = csv::ReaderBuilder::new()
455                    .has_headers(false)
456                    .delimiter(b'\t')
457                    .from_reader(contents.as_bytes());
458
459                let admin_division = rdr
460                    .deserialize()
461                    .filter_map(|row| {
462                        let record: Admin2CodeRecordRaw = row.ok()?;
463                        Some((
464                            record.code.clone(),
465                            AdminDivision {
466                                id: record.geonameid,
467                                code: record.code,
468                                name: record.name,
469                            },
470                        ))
471                    })
472                    .collect::<HashMap<String, AdminDivision>>();
473
474                #[cfg(feature = "tracing")]
475                tracing::info!(
476                    "Engine read {} admin2 codes took {}ms",
477                    admin_division.len(),
478                    now.elapsed().as_millis(),
479                );
480
481                Some(admin_division)
482            }
483            None => None,
484        };
485
486        let mut names_by_id: Option<HashMap<u32, HashMap<String, String>>> = match names {
487            Some(contents) => {
488                #[cfg(feature = "tracing")]
489                let now = Instant::now();
490
491                // collect ids for cities
492                let city_geoids = records
493                    .iter()
494                    .map(|item| item.geonameid)
495                    .collect::<HashSet<u32>>();
496
497                let country_geoids = if let Some(ref country_by_code) = country_by_code {
498                    country_by_code
499                        .values()
500                        .map(|item| item.geonameid)
501                        .collect::<HashSet<u32>>()
502                } else {
503                    HashSet::<u32>::new()
504                };
505
506                let admin1_geoids = if let Some(ref admin_codes) = admin1_by_code {
507                    admin_codes
508                        .values()
509                        .map(|item| item.id)
510                        .collect::<HashSet<u32>>()
511                } else {
512                    HashSet::<u32>::new()
513                };
514
515                let admin2_geoids = if let Some(ref admin_codes) = admin2_by_code {
516                    admin_codes
517                        .values()
518                        .map(|item| item.id)
519                        .collect::<HashSet<u32>>()
520                } else {
521                    HashSet::<u32>::new()
522                };
523
524                // TODO: split to N parts can split one geonameid and build not accurate index
525                // use rayon::current_num_threads() instead of 1
526                let names_by_id = split_content_to_n_parts(&contents, 1)
527                    .par_iter()
528                    .map(move |chunk| {
529                        let mut rdr = csv::ReaderBuilder::new()
530                            .has_headers(false)
531                            .delimiter(b'\t')
532                            .from_reader(chunk.as_bytes());
533
534                        let mut names_by_id: HashMap<u32, HashMap<String, AlternateNamesRaw>> =
535                            HashMap::new();
536
537                        for row in rdr.deserialize() {
538                            let record: AlternateNamesRaw = if let Ok(r) = row {
539                                r
540                            } else {
541                                continue;
542                            };
543
544                            let is_city_name = city_geoids.contains(&record.geonameid);
545                            let mut skip = !is_city_name;
546
547                            if skip {
548                                skip = !country_geoids.contains(&record.geonameid)
549                            }
550
551                            if skip {
552                                skip = !admin1_geoids.contains(&record.geonameid)
553                            }
554
555                            if skip {
556                                skip = !admin2_geoids.contains(&record.geonameid)
557                            }
558
559                            // entry not used
560                            if skip {
561                                continue;
562                            }
563
564                            // skip short not preferred names for cities
565                            if is_city_name
566                                && record.is_short_name == "1"
567                                && record.is_preferred_name != "1"
568                            {
569                                continue;
570                            }
571
572                            if record.is_colloquial == "1" {
573                                continue;
574                            }
575                            if record.is_historic == "1" {
576                                continue;
577                            }
578
579                            // filter by languages
580                            if !filter_languages.contains(&record.isolanguage.as_str()) {
581                                continue;
582                            }
583
584                            let lang = record.isolanguage.to_owned();
585
586                            if let Some(item) = names_by_id.get_mut(&record.geonameid) {
587                                // don't overwrite preferred name
588                                let is_current_preferred_name = item
589                                    .get(&record.isolanguage)
590                                    .map(|i| i.is_preferred_name == "1")
591                                    .unwrap_or(false);
592
593                                if !is_current_preferred_name {
594                                    item.insert(lang, record);
595                                }
596                            } else {
597                                let mut map: HashMap<String, AlternateNamesRaw> = HashMap::new();
598                                let geonameid = record.geonameid;
599                                map.insert(lang.to_owned(), record);
600                                names_by_id.insert(geonameid, map);
601                            }
602                        }
603
604                        // convert names to simple struct
605                        let result: HashMap<u32, HashMap<String, String>> =
606                            names_by_id.iter().fold(HashMap::new(), |mut acc, c| {
607                                let (geonameid, names) = c;
608                                acc.insert(
609                                    *geonameid,
610                                    names.iter().fold(
611                                        HashMap::new(),
612                                        |mut accn: HashMap<String, String>, n| {
613                                            let (isolanguage, n) = n;
614                                            accn.insert(
615                                                isolanguage.to_owned(),
616                                                n.alternate_name.to_owned(),
617                                            );
618                                            accn
619                                        },
620                                    ),
621                                );
622                                acc
623                            });
624                        result
625                    })
626                    .reduce(HashMap::new, |mut m1, m2| {
627                        m1.extend(m2);
628                        m1
629                    });
630
631                #[cfg(feature = "tracing")]
632                tracing::info!(
633                    "Engine read {} names took {}ms",
634                    records.len(),
635                    now.elapsed().as_millis(),
636                );
637
638                Some(names_by_id)
639            }
640            None => None,
641        };
642
643        let mut capitals: HashMap<String, u32> =
644            HashMap::with_capacity(if let Some(items) = &country_by_code {
645                items.len()
646            } else {
647                0
648            });
649
650        for record in records {
651            // INCLUDE:
652            // PPL	populated place	a city, town, village, or other agglomeration of buildings where people live and work
653            // PPLA	seat of a first-order administrative division	seat of a first-order administrative division (PPLC takes precedence over PPLA)
654            // PPLA2	seat of a second-order administrative division
655            // PPLA3	seat of a third-order administrative division
656            // PPLA4	seat of a fourth-order administrative division
657            // PPLA5	seat of a fifth-order administrative division
658            // PPLC	capital of a political entity
659            // PPLS	populated places	cities, towns, villages, or other agglomerations of buildings where people live and work
660            // PPLG	seat of government of a political entity
661            // PPLCH	historical capital of a political entity	a former capital of a political entity
662            //
663            // EXCLUDE:
664            // PPLF farm village	a populated place where the population is largely engaged in agricultural activities
665            // PPLL	populated locality	an area similar to a locality but with a small group of dwellings or other buildings
666            // PPLQ	abandoned populated place
667            // PPLW	destroyed populated place	a village, town or city destroyed by a natural disaster, or by war
668            // PPLX	section of populated place
669            // STLMT israeli settlement
670
671            let feature_code = record.feature_code.as_str();
672            match feature_code {
673                "PPLA3" | "PPLA4" | "PPLA5" | "PPLF" | "PPLL" | "PPLQ" | "PPLW" | "PPLX"
674                | "STLMT" => continue,
675                _ => {}
676            };
677
678            let is_capital = feature_code == "PPLC";
679
680            let country_id = country_by_code
681                .as_ref()
682                .and_then(|m| m.get(&record.country_code).map(|c| c.geonameid));
683
684            entries.push(Entry {
685                id: record.geonameid,
686                value: record.name.to_lowercase().to_owned(),
687                country_id,
688            });
689
690            if record.name != record.asciiname {
691                entries.push(Entry {
692                    id: record.geonameid,
693                    value: record.asciiname.to_lowercase().to_owned(),
694                    country_id,
695                });
696            }
697
698            for altname in record.alternatenames.split(',') {
699                entries.push(Entry {
700                    id: record.geonameid,
701                    value: altname.to_lowercase(),
702                    country_id,
703                });
704            }
705
706            let country = if let Some(ref c) = country_by_code {
707                if is_capital {
708                    capitals.insert(record.country_code.to_string(), record.geonameid);
709                }
710                c.get(&record.country_code).cloned()
711            } else {
712                None
713            };
714
715            let country_names = if let Some(ref c) = country {
716                match names_by_id {
717                    Some(ref names) => names.get(&c.geonameid).cloned(),
718                    None => None,
719                }
720            } else {
721                None
722            };
723
724            let admin_division = if let Some(ref a) = admin1_by_code {
725                a.get(&format!("{}.{}", record.country_code, record.admin1_code))
726                    .cloned()
727            } else {
728                None
729            };
730
731            let admin1_names = if let Some(ref a) = admin_division {
732                match names_by_id {
733                    Some(ref names) => names.get(&a.id).cloned(),
734                    None => None,
735                }
736            } else {
737                None
738            };
739
740            let admin2_division = if let Some(ref a) = admin2_by_code {
741                a.get(&format!(
742                    "{}.{}.{}",
743                    record.country_code, record.admin1_code, record.admin2_code
744                ))
745                .cloned()
746            } else {
747                None
748            };
749
750            let admin2_names = if let Some(ref a) = admin2_division {
751                match names_by_id {
752                    Some(ref names) => names.get(&a.id).cloned(),
753                    None => None,
754                }
755            } else {
756                None
757            };
758            geonames.push(CitiesRecord {
759                id: record.geonameid,
760                name: record.name,
761                country: country.as_ref().map(Country::from),
762                admin_division,
763                admin2_division,
764                latitude: record.latitude,
765                longitude: record.longitude,
766                timezone: record.timezone,
767                names: match names_by_id {
768                    Some(ref mut names) => {
769                        if is_capital {
770                            names.get(&record.geonameid).cloned()
771                        } else {
772                            // don't hold unused data
773                            names.remove(&record.geonameid)
774                        }
775                    }
776                    None => None,
777                },
778                country_names,
779                admin1_names,
780                admin2_names,
781                population: record.population,
782            });
783        }
784
785        geonames.sort_unstable_by_key(|item| item.id);
786        geonames.dedup_by_key(|item| item.id);
787
788        let tree_index_to_geonameid = HashMap::from_iter(
789            geonames
790                .iter()
791                .enumerate()
792                .map(|(index, item)| (index, item.id)),
793        );
794        let tree = ImmutableKdTree::new_from_slice(
795            geonames
796                .iter()
797                .map(|item| [item.latitude, item.longitude])
798                .collect::<Vec<_>>()
799                .as_slice(),
800        );
801
802        let data = IndexData {
803            tree,
804            tree_index_to_geonameid,
805            entries,
806            geonames: HashMap::from_iter(geonames.into_iter().map(|item| (item.id, item))),
807            country_info_by_code: if let Some(country_by_code) = country_by_code {
808                HashMap::from_iter(country_by_code.into_iter().map(|(code, country)| {
809                    let country_record = CountryRecord {
810                        names: names_by_id
811                            .as_ref()
812                            .and_then(|names| names.get(&country.geonameid).cloned()),
813                        capital_names: match names_by_id {
814                            Some(ref names) => {
815                                if let Some(city_id) = capitals.get(&country.iso) {
816                                    names.get(city_id).cloned()
817                                } else {
818                                    None
819                                }
820                            }
821                            None => None,
822                        },
823                        info: country,
824                    };
825
826                    (code, country_record)
827                }))
828            } else {
829                HashMap::new()
830            },
831            capitals,
832        };
833
834        #[cfg(feature = "tracing")]
835        tracing::info!(
836            "Index data ready (entries {}, geonames {}, capitals {}). took {}ms",
837            data.entries.len(),
838            data.geonames.len(),
839            data.capitals.len(),
840            now.elapsed().as_millis()
841        );
842        Ok(data)
843    }
844}
845
846fn serialize_archived_string<S>(value: &ArchivedString, s: S) -> Result<S::Ok, S::Error>
847where
848    S: Serializer,
849{
850    s.serialize_str(value.as_str())
851}
852
853fn serialize_archived_u32<S>(value: &u32_le, s: S) -> Result<S::Ok, S::Error>
854where
855    S: Serializer,
856{
857    s.serialize_u32(value.to_native())
858}
859
860fn serialize_archived_f32<S>(value: &f32_le, s: S) -> Result<S::Ok, S::Error>
861where
862    S: Serializer,
863{
864    s.serialize_f32(value.to_native())
865}
866
867fn serialize_archived_option<S, T>(value: &ArchivedOption<T>, s: S) -> Result<S::Ok, S::Error>
868where
869    S: Serializer,
870    T: serde::Serialize,
871{
872    if let Some(v) = value.as_ref() {
873        s.serialize_some(v)
874    } else {
875        s.serialize_none()
876    }
877}
878
879fn serialize_archived_optional_map<S>(
880    value: &ArchivedOption<ArchivedHashMap<ArchivedString, ArchivedString>>,
881    s: S,
882) -> Result<S::Ok, S::Error>
883where
884    S: Serializer,
885{
886    if let Some(v) = value.as_ref() {
887        let mut map = s.serialize_map(v.len().into())?;
888        for (key, value) in v.iter() {
889            map.serialize_entry(key.as_str(), value.as_str())?;
890        }
891        map.end()
892    } else {
893        s.serialize_none()
894    }
895}