use std::io::BufRead;
use std::path::Path;
use crate::data::Data;
use crate::error::{Error, Result};
const GEONAMES_URL: &str =
"https://download.geonames.org/export/dump/cities500.zip";
const GEONAMES_FILENAME: &str = "cities500.zip";
const GEONAMES_TTL: std::time::Duration =
std::time::Duration::from_secs(30 * 24 * 60 * 60);
const GEONAMES_ADMIN1_URL: &str = "https://download.geonames.org/export/dump/admin1CodesASCII.txt";
const GEONAMES_ADMIN1_FILENAME: &str = "admin1CodesASCII.txt";
const GEONAMES_COUNTRY_URL: &str = "https://download.geonames.org/export/dump/countryInfo.txt";
const GEONAMES_COUNTRY_FILENAME: &str = "countryInfo.txt";
#[derive(Debug, Default, Clone)]
pub struct GeoName {
pub id: i64,
pub name: String,
pub latitude: f64,
pub longitude: f64,
pub feature_class: String,
pub feature_code: String,
pub country_code: String,
pub admin1_code: String,
pub admin2_code: String,
pub population: i64,
pub timezone: String,
pub date_modified: String,
}
pub struct Admin1 {
pub key: String,
pub name: String,
}
pub struct CountryInfo {
pub iso: String,
pub country_name: String,
pub continent_code: String,
}
const GEONAMES_DDL: &str = r#"PRAGMA synchronous=NORMAL;
CREATE TABLE IF NOT EXISTS settings (
"key" TEXT PRIMARY KEY NOT NULL,
"value" TEXT NOT NULL DEFAULT ''
);
CREATE TABLE IF NOT EXISTS geonames (
"id" INTEGER PRIMARY KEY NOT NULL,
"name" TEXT NOT NULL DEFAULT '',
"latitude" REAL NOT NULL DEFAULT 0.0,
"longitude" REAL NOT NULL DEFAULT 0.0,
"feature_class" TEXT NOT NULL DEFAULT '',
"feature_code" TEXT NOT NULL DEFAULT '',
"country_code" TEXT NOT NULL DEFAULT '',
"admin1_code" TEXT NOT NULL DEFAULT '',
"admin2_code" TEXT NOT NULL DEFAULT '',
"population" INTEGER NOT NULL DEFAULT 0,
"timezone" TEXT NOT NULL DEFAULT '',
"date_modified" TEXT NOT NULL DEFAULT ''
);
CREATE INDEX IF NOT EXISTS geonames_country ON geonames("country_code");
CREATE TABLE IF NOT EXISTS geonames_admin1 (
"key" TEXT PRIMARY KEY NOT NULL,
"name" TEXT NOT NULL DEFAULT ''
);
CREATE TABLE IF NOT EXISTS geonames_countries (
"iso" TEXT PRIMARY KEY NOT NULL,
"country_name" TEXT NOT NULL DEFAULT '',
"continent_code" TEXT NOT NULL DEFAULT ''
);
"#;
const GEONAMES_INSERT: &str = r#"INSERT OR REPLACE INTO geonames (
"id", "name", "latitude", "longitude", "feature_class", "feature_code",
"country_code", "admin1_code", "admin2_code", "population", "timezone", "date_modified"
) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)"#;
const GEONAMES_ADMIN1_INSERT: &str =
r#"INSERT OR REPLACE INTO geonames_admin1 ("key", "name") VALUES (?1, ?2)"#;
const GEONAMES_COUNTRY_INSERT: &str = r#"INSERT OR REPLACE INTO geonames_countries (
"iso", "country_name", "continent_code"
) VALUES (?1, ?2, ?3)"#;
fn continent_name(code: &str) -> &'static str {
match code {
"AF" => "Africa",
"AN" => "Antarctica",
"AS" => "Asia",
"EU" => "Europe",
"NA" => "North America",
"OC" => "Oceania",
"SA" => "South America",
_ => "",
}
}
fn parse_line(line: &str) -> Option<GeoName> {
let f: Vec<&str> = line.splitn(20, '\t').collect();
if f.len() < 19 {
return None;
}
let id: i64 = f[0].parse().ok()?;
Some(GeoName {
id,
name: f[1].to_string(),
latitude: f[4].parse().unwrap_or(0.0),
longitude: f[5].parse().unwrap_or(0.0),
feature_class: f[6].to_string(),
feature_code: f[7].to_string(),
country_code: f[8].to_string(),
admin1_code: f[10].to_string(),
admin2_code: f[11].to_string(),
population: f[14].parse().unwrap_or(0),
timezone: f[17].to_string(),
date_modified: f[18].to_string(),
})
}
pub fn parse_txt(bytes: &[u8]) -> Vec<GeoName> {
let reader = std::io::BufReader::new(bytes);
reader
.lines()
.filter_map(|l| l.ok())
.filter(|l| !l.starts_with('#') && !l.is_empty())
.filter_map(|l| parse_line(&l))
.collect()
}
pub fn parse_admin1_txt(bytes: &[u8]) -> Vec<Admin1> {
let reader = std::io::BufReader::new(bytes);
reader
.lines()
.filter_map(|l| l.ok())
.filter(|l| !l.starts_with('#') && !l.is_empty())
.filter_map(|line| {
let fields: Vec<&str> = line.splitn(4, '\t').collect();
if fields.len() < 2 {
return None;
}
Some(Admin1 {
key: fields[0].to_string(),
name: fields[1].to_string(),
})
})
.collect()
}
pub fn parse_country_info_txt(bytes: &[u8]) -> Vec<CountryInfo> {
let reader = std::io::BufReader::new(bytes);
reader
.lines()
.filter_map(|l| l.ok())
.filter(|l| !l.starts_with('#') && !l.is_empty())
.filter_map(|line| {
let fields: Vec<&str> = line.split('\t').collect();
if fields.len() < 9 {
return None;
}
Some(CountryInfo {
iso: fields[0].to_string(),
country_name: fields[4].to_string(),
continent_code: fields[8].to_string(),
})
})
.collect()
}
fn from_geoname(g: &GeoName) -> Data {
Data {
id: format!("https://sws.geonames.org/{}/", g.id),
type_: "Place".to_string(),
name: g.name.clone(),
title: g.name.clone(),
country: g.country_code.clone(),
..Data::default()
}
}
pub fn write_sqlite(
list: &[GeoName],
admin1_list: &[Admin1],
country_list: &[CountryInfo],
path: &Path,
date: Option<&str>,
) -> Result<()> {
use rusqlite::{params, Connection};
if let Some(parent) = path.parent() {
if !parent.as_os_str().is_empty() && !parent.exists() {
std::fs::create_dir_all(parent)
.map_err(|e| Error::Parse(format!("failed to create directory: {}", e)))?;
}
}
let conn = Connection::open(path)
.map_err(|e| Error::Parse(format!("failed to open sqlite '{}': {}", path.display(), e)))?;
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS settings (\
\"key\" TEXT PRIMARY KEY NOT NULL, \
\"value\" TEXT NOT NULL DEFAULT ''\
); \
DELETE FROM settings WHERE key = 'geonames_date';",
)
.map_err(|e| Error::Parse(e.to_string()))?;
conn.execute_batch(
"DROP TABLE IF EXISTS geonames_fts; \
DROP TABLE IF EXISTS geonames; \
DROP TABLE IF EXISTS geonames_admin1; \
DROP TABLE IF EXISTS geonames_countries;",
)
.map_err(|e| Error::Parse(e.to_string()))?;
conn.execute_batch(GEONAMES_DDL)
.map_err(|e| Error::Parse(e.to_string()))?;
{
let tx = conn
.unchecked_transaction()
.map_err(|e| Error::Parse(e.to_string()))?;
{
let mut stmt = conn
.prepare(GEONAMES_INSERT)
.map_err(|e| Error::Parse(e.to_string()))?;
for g in list {
stmt.execute(params![
g.id,
g.name,
g.latitude,
g.longitude,
g.feature_class,
g.feature_code,
g.country_code,
g.admin1_code,
g.admin2_code,
g.population,
g.timezone,
g.date_modified,
])
.map_err(|e| Error::Parse(e.to_string()))?;
}
}
{
let mut stmt = conn
.prepare(GEONAMES_ADMIN1_INSERT)
.map_err(|e| Error::Parse(e.to_string()))?;
for a in admin1_list {
stmt.execute(params![a.key, a.name])
.map_err(|e| Error::Parse(e.to_string()))?;
}
}
{
let mut stmt = conn
.prepare(GEONAMES_COUNTRY_INSERT)
.map_err(|e| Error::Parse(e.to_string()))?;
for c in country_list {
stmt.execute(params![c.iso, c.country_name, c.continent_code])
.map_err(|e| Error::Parse(e.to_string()))?;
}
}
tx.commit().map_err(|e| Error::Parse(e.to_string()))?;
}
if let Some(d) = date {
conn.execute(
"INSERT OR REPLACE INTO settings (key, value) VALUES ('geonames_date', ?1)",
[d],
)
.map_err(|e| Error::Parse(e.to_string()))?;
}
Ok(())
}
pub fn fetch_sqlite(id: i64, db_path: &Path) -> Result<Data> {
use rusqlite::Connection;
let conn = Connection::open(db_path)
.map_err(|e| Error::Parse(format!("failed to open sqlite '{}': {}", db_path.display(), e)))?;
let result = conn.query_row(
"SELECT id, name, latitude, longitude, feature_class, feature_code, \
country_code, admin1_code, admin2_code, population, timezone, date_modified \
FROM geonames WHERE id = ?1",
[id],
|row| {
Ok(GeoName {
id: row.get(0)?,
name: row.get(1)?,
latitude: row.get(2)?,
longitude: row.get(3)?,
feature_class: row.get(4)?,
feature_code: row.get(5)?,
country_code: row.get(6)?,
admin1_code: row.get(7)?,
admin2_code: row.get(8)?,
population: row.get(9)?,
timezone: row.get(10)?,
date_modified: row.get(11)?,
})
},
);
match result {
Ok(g) => Ok(from_geoname(&g)),
Err(rusqlite::Error::QueryReturnedNoRows) => {
Err(Error::Parse(format!("GeoNames id {} not found", id)))
}
Err(e) => Err(Error::Parse(e.to_string())),
}
}
pub fn fetch_geoname_raw(id: i64, db_path: &Path) -> Result<GeoName> {
use rusqlite::Connection;
let conn = Connection::open(db_path)
.map_err(|e| Error::Parse(format!("failed to open sqlite '{}': {}", db_path.display(), e)))?;
let result = conn.query_row(
"SELECT id, name, latitude, longitude, feature_class, feature_code, \
country_code, admin1_code, admin2_code, population, timezone, date_modified \
FROM geonames WHERE id = ?1",
[id],
|row| {
Ok(GeoName {
id: row.get(0)?,
name: row.get(1)?,
latitude: row.get(2)?,
longitude: row.get(3)?,
feature_class: row.get(4)?,
feature_code: row.get(5)?,
country_code: row.get(6)?,
admin1_code: row.get(7)?,
admin2_code: row.get(8)?,
population: row.get(9)?,
timezone: row.get(10)?,
date_modified: row.get(11)?,
})
},
);
match result {
Ok(g) => Ok(g),
Err(rusqlite::Error::QueryReturnedNoRows) => {
Err(Error::Parse(format!("GeoNames id {} not found", id)))
}
Err(e) => Err(Error::Parse(e.to_string())),
}
}
pub fn lookup_admin1(country_code: &str, admin1_code: &str, db_path: &Path) -> Result<Option<String>> {
use rusqlite::Connection;
let conn = Connection::open(db_path)
.map_err(|e| Error::Parse(format!("failed to open sqlite '{}': {}", db_path.display(), e)))?;
let key = format!("{}.{}", country_code, admin1_code);
match conn.query_row(
"SELECT name FROM geonames_admin1 WHERE key = ?1",
[&key],
|row| row.get::<_, String>(0),
) {
Ok(name) => Ok(Some(name)),
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
Err(e) => Err(Error::Parse(e.to_string())),
}
}
pub fn lookup_country(country_code: &str, db_path: &Path) -> Result<Option<(String, String, String)>> {
use rusqlite::Connection;
let conn = Connection::open(db_path)
.map_err(|e| Error::Parse(format!("failed to open sqlite '{}': {}", db_path.display(), e)))?;
match conn.query_row(
"SELECT country_name, continent_code FROM geonames_countries WHERE iso = ?1",
[country_code],
|row| {
let country_name: String = row.get(0)?;
let continent_code: String = row.get(1)?;
Ok((country_name, continent_code))
},
) {
Ok((country_name, continent_code)) => {
let cont_name = continent_name(&continent_code).to_string();
Ok(Some((country_name, continent_code, cont_name)))
}
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
Err(e) => Err(Error::Parse(e.to_string())),
}
}
pub fn fetch_installed_geonames_date(db_path: &Path) -> Result<Option<String>> {
if !db_path.exists() {
return Ok(None);
}
use rusqlite::Connection;
let conn = Connection::open(db_path).map_err(|e| Error::Parse(e.to_string()))?;
match conn.query_row(
"SELECT value FROM settings WHERE key = 'geonames_date'",
[],
|row| row.get::<_, String>(0),
) {
Ok(v) => Ok(Some(v)),
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
Err(e) => Err(Error::Parse(e.to_string())),
}
}
pub fn download_all() -> Result<(Vec<GeoName>, Vec<Admin1>, Vec<CountryInfo>, bool)> {
let (zip_path, cache1) =
crate::io_utils::ensure_cached_path(GEONAMES_URL, "geonames", GEONAMES_FILENAME, GEONAMES_TTL)
.map_err(|e| Error::Http(e.to_string()))?;
let zip_bytes = std::fs::read(&zip_path)
.map_err(|e| Error::Http(format!("reading cached zip: {}", e)))?;
let txt_bytes = crate::io_utils::unzip_first_txt(&zip_bytes)
.map_err(|e| Error::Parse(e.to_string()))?;
let list = parse_txt(&txt_bytes);
let (admin1_path, cache2) =
crate::io_utils::ensure_cached_path(GEONAMES_ADMIN1_URL, "geonames", GEONAMES_ADMIN1_FILENAME, GEONAMES_TTL)
.map_err(|e| Error::Http(e.to_string()))?;
let admin1_bytes = std::fs::read(&admin1_path)
.map_err(|e| Error::Http(format!("reading admin1 file: {}", e)))?;
let admin1_list = parse_admin1_txt(&admin1_bytes);
let (country_path, cache3) =
crate::io_utils::ensure_cached_path(GEONAMES_COUNTRY_URL, "geonames", GEONAMES_COUNTRY_FILENAME, GEONAMES_TTL)
.map_err(|e| Error::Http(e.to_string()))?;
let country_bytes = std::fs::read(&country_path)
.map_err(|e| Error::Http(format!("reading country info file: {}", e)))?;
let country_list = parse_country_info_txt(&country_bytes);
Ok((list, admin1_list, country_list, cache1 && cache2 && cache3))
}
#[cfg(test)]
mod tests {
use super::*;
const SAMPLE_LINE: &str =
"5381396\tPasadena\tPasadena\t\t34.14778\t-118.14452\tP\tPPL\tUS\t\tCA\t037\t\t\t141371\t\t236\tAmerica/Los_Angeles\t2019-09-05";
#[test]
fn test_parse_line_basic() {
let g = parse_line(SAMPLE_LINE).unwrap();
assert_eq!(g.id, 5381396);
assert_eq!(g.name, "Pasadena");
assert_eq!(g.feature_class, "P");
assert_eq!(g.feature_code, "PPL");
assert_eq!(g.country_code, "US");
assert_eq!(g.admin1_code, "CA");
assert_eq!(g.population, 141371);
assert!((g.latitude - 34.14778).abs() < 1e-5);
assert!((g.longitude - -118.14452).abs() < 1e-5);
assert_eq!(g.timezone, "America/Los_Angeles");
assert_eq!(g.date_modified, "2019-09-05");
}
#[test]
fn test_from_geoname() {
let g = parse_line(SAMPLE_LINE).unwrap();
let data = from_geoname(&g);
assert_eq!(data.id, "https://sws.geonames.org/5381396/");
assert_eq!(data.type_, "Place");
assert_eq!(data.name, "Pasadena");
assert_eq!(data.country, "US");
}
#[test]
fn test_parse_txt_empty() {
let list = parse_txt(b"");
assert!(list.is_empty());
}
#[test]
fn test_parse_txt_skips_short_lines() {
let bad = b"123\tonly two fields";
let list = parse_txt(bad);
assert!(list.is_empty());
}
#[test]
fn test_parse_admin1_txt() {
let data = b"US.CA\tCalifornia\tCalifornia\t5332921\nUS.NY\tNew York\tNew York\t5128638\n";
let list = parse_admin1_txt(data);
assert_eq!(list.len(), 2);
assert_eq!(list[0].key, "US.CA");
assert_eq!(list[0].name, "California");
}
#[test]
fn test_parse_admin1_txt_skips_comments() {
let data = b"# comment\nUS.CA\tCalifornia\tCalifornia\t5332921\n";
let list = parse_admin1_txt(data);
assert_eq!(list.len(), 1);
}
#[test]
fn test_parse_country_info_txt() {
let line = "US\tUSA\t840\tUS\tUnited States\tWashington\t9629091\t310232863\tNA\t.us\tUSD\tDollar\t1\t#####-####\t^\\d{5}(-\\d{4})?$\ten-US\t6252001\tCA,MX,CU\n";
let list = parse_country_info_txt(line.as_bytes());
assert_eq!(list.len(), 1);
assert_eq!(list[0].iso, "US");
assert_eq!(list[0].country_name, "United States");
assert_eq!(list[0].continent_code, "NA");
}
#[test]
fn test_parse_country_info_txt_skips_comments() {
let data = b"# This is a comment\nUS\tUSA\t840\tUS\tUnited States\tWashington\t9629091\t310232863\tNA\t.us\tUSD\tDollar\t1\t#####-####\t^\\d{5}(-\\d{4})?$\ten-US\t6252001\tCA,MX,CU\n";
let list = parse_country_info_txt(data);
assert_eq!(list.len(), 1);
}
#[test]
fn test_continent_name() {
assert_eq!(continent_name("NA"), "North America");
assert_eq!(continent_name("EU"), "Europe");
assert_eq!(continent_name("AS"), "Asia");
assert_eq!(continent_name("AF"), "Africa");
assert_eq!(continent_name("SA"), "South America");
assert_eq!(continent_name("OC"), "Oceania");
assert_eq!(continent_name("AN"), "Antarctica");
assert_eq!(continent_name("XX"), "");
}
#[test]
fn test_write_and_fetch_sqlite() {
let g = parse_line(SAMPLE_LINE).unwrap();
let db_path = std::path::Path::new("/tmp/geonames-test.sqlite3");
if db_path.exists() { std::fs::remove_file(db_path).unwrap(); }
let admin1 = vec![Admin1 { key: "US.CA".to_string(), name: "California".to_string() }];
let country = vec![CountryInfo { iso: "US".to_string(), country_name: "United States".to_string(), continent_code: "NA".to_string() }];
write_sqlite(&[g], &admin1, &country, db_path, Some("2026-07-02")).unwrap();
let data = fetch_sqlite(5381396, db_path).unwrap();
assert_eq!(data.name, "Pasadena");
assert_eq!(data.country, "US");
let date = fetch_installed_geonames_date(db_path).unwrap();
assert_eq!(date, Some("2026-07-02".to_string()));
}
#[test]
fn test_fetch_sqlite_not_found() {
let db_path = std::path::Path::new("/tmp/geonames-notfound-test.sqlite3");
if db_path.exists() { std::fs::remove_file(db_path).unwrap(); }
let g = parse_line(SAMPLE_LINE).unwrap();
write_sqlite(&[g], &[], &[], db_path, None).unwrap();
let result = fetch_sqlite(9999999, db_path);
assert!(matches!(result, Err(Error::Parse(_))));
}
#[test]
fn test_lookup_admin1() {
let db_path = std::path::Path::new("/tmp/geonames-admin1-test.sqlite3");
if db_path.exists() { std::fs::remove_file(db_path).unwrap(); }
let admin1 = vec![Admin1 { key: "US.CA".to_string(), name: "California".to_string() }];
write_sqlite(&[], &admin1, &[], db_path, None).unwrap();
let result = lookup_admin1("US", "CA", db_path).unwrap();
assert_eq!(result, Some("California".to_string()));
let missing = lookup_admin1("US", "ZZ", db_path).unwrap();
assert_eq!(missing, None);
}
#[test]
fn test_lookup_country() {
let db_path = std::path::Path::new("/tmp/geonames-country-test.sqlite3");
if db_path.exists() { std::fs::remove_file(db_path).unwrap(); }
let country = vec![CountryInfo { iso: "US".to_string(), country_name: "United States".to_string(), continent_code: "NA".to_string() }];
write_sqlite(&[], &[], &country, db_path, None).unwrap();
let result = lookup_country("US", db_path).unwrap();
assert_eq!(result, Some(("United States".to_string(), "NA".to_string(), "North America".to_string())));
let missing = lookup_country("ZZ", db_path).unwrap();
assert_eq!(missing, None);
}
}