Skip to main content

exiftool_rs/
geolocation.rs

1//! Geolocation reverse geocoding using ExifTool's Geolocation.dat.
2//!
3//! Loads the binary database and finds the nearest city to a GPS coordinate.
4
5use std::path::Path;
6
7/// A city entry from the geolocation database.
8#[derive(Debug, Clone)]
9pub struct City {
10    pub name: String,
11    pub country_code: String,
12    pub country: String,
13    pub region: String,
14    pub subregion: String,
15    pub timezone: String,
16    pub population: u64,
17    pub lat: f64,
18    pub lon: f64,
19}
20
21/// The geolocation database.
22pub struct GeolocationDb {
23    cities: Vec<CityRecord>,
24    countries: Vec<(String, String)>, // (code, name)
25    regions: Vec<String>,
26    subregions: Vec<String>,
27    timezones: Vec<String>,
28}
29
30/// Raw binary city record (13 bytes + name).
31struct CityRecord {
32    lat_raw: u32,  // 20-bit
33    lon_raw: u32,  // 20-bit
34    country_idx: u8,
35    pop_code: u32,
36    region_idx: u16,
37    subregion_idx: u16,
38    tz_idx: u16,
39    name: String,
40}
41
42impl GeolocationDb {
43    /// Load the database from ExifTool's Geolocation.dat file.
44    pub fn load<P: AsRef<Path>>(path: P) -> Option<Self> {
45        let data = std::fs::read(path.as_ref()).ok()?;
46        Self::parse(&data)
47    }
48
49    /// Try to find the database in common locations.
50    pub fn load_default() -> Option<Self> {
51        // Try relative to executable, then common ExifTool locations
52        let candidates = [
53            "Geolocation.dat",
54            "../exiftool/lib/Image/ExifTool/Geolocation.dat",
55            "/usr/share/exiftool/Geolocation.dat",
56            "/usr/local/share/exiftool/Geolocation.dat",
57        ];
58
59        for path in &candidates {
60            if let Some(db) = Self::load(path) {
61                return Some(db);
62            }
63        }
64
65        // Try relative to the executable path
66        if let Ok(exe) = std::env::current_exe() {
67            if let Some(dir) = exe.parent() {
68                let p = dir.join("Geolocation.dat");
69                if let Some(db) = Self::load(&p) {
70                    return Some(db);
71                }
72            }
73        }
74
75        None
76    }
77
78    /// Parse the binary database.
79    fn parse(data: &[u8]) -> Option<Self> {
80        // Find first newline (end of header line)
81        let header_end = data.iter().position(|&b| b == b'\n')?;
82        let header = std::str::from_utf8(&data[..header_end]).ok()?;
83
84        // Validate header: "GeolocationX.XX\tNNNN"
85        if !header.starts_with("Geolocation") {
86            return None;
87        }
88        let tab_pos = header.find('\t')?;
89        let city_count: usize = header[tab_pos + 1..].parse().ok()?;
90
91        // Skip comment line
92        let mut pos = header_end + 1;
93        if pos < data.len() && data[pos] == b'#' {
94            while pos < data.len() && data[pos] != b'\n' {
95                pos += 1;
96            }
97            pos += 1;
98        }
99
100        // Read city records
101        let mut cities = Vec::with_capacity(city_count);
102        loop {
103            if pos + 6 > data.len() {
104                break;
105            }
106            // Check for section separator: \0\0\0\0\xNN\n
107            if data[pos] == 0 && data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0 {
108                pos += 6; // Skip separator
109                break;
110            }
111
112            // Need at least 14 bytes (13 binary + 1 for name + newline)
113            if pos + 14 > data.len() {
114                break;
115            }
116
117            // Parse 13-byte binary header
118            let lt = u16::from_be_bytes([data[pos], data[pos + 1]]);
119            let f = data[pos + 2];
120            let ln = u16::from_be_bytes([data[pos + 3], data[pos + 4]]);
121            let code = u32::from_be_bytes([data[pos + 5], data[pos + 6], data[pos + 7], data[pos + 8]]);
122            let sn = u16::from_be_bytes([data[pos + 9], data[pos + 10]]);
123            let tn = data[pos + 11];
124            let fn_byte = data[pos + 12];
125
126            let lat_raw = ((lt as u32) << 4) | ((f >> 4) as u32);
127            let lon_raw = ((ln as u32) << 4) | ((f & 0x0F) as u32);
128            let country_idx = (code >> 24) as u8;
129            let region_idx = (code & 0x0FFF) as u16;
130            let subregion_idx = sn & 0x7FFF;
131
132            // Timezone: 9-bit index
133            let tz_high = (fn_byte >> 7) as u16; // v1.03: bit 7 of feature byte
134            let tz_idx = (tz_high << 8) | (tn as u16);
135
136            // Find city name (UTF-8 until newline)
137            let name_start = pos + 13;
138            let name_end = data[name_start..].iter().position(|&b| b == b'\n')
139                .map(|p| name_start + p)
140                .unwrap_or(data.len());
141
142            let name = String::from_utf8_lossy(&data[name_start..name_end]).to_string();
143
144            cities.push(CityRecord {
145                lat_raw,
146                lon_raw,
147                country_idx,
148                pop_code: code,
149                region_idx,
150                subregion_idx,
151                tz_idx,
152                name,
153            });
154
155            pos = name_end + 1;
156        }
157
158        // Read string lists
159        // Countries: "CCCountryName\n" (2-char code + name on same line)
160        let countries = read_country_list(&data, &mut pos);
161        skip_separator(&data, &mut pos);
162        let regions = read_string_list(&data, &mut pos);
163        skip_separator(&data, &mut pos);
164        let subregions = read_string_list(&data, &mut pos);
165        skip_separator(&data, &mut pos);
166        let timezones = read_string_list(&data, &mut pos);
167
168        Some(GeolocationDb {
169            cities,
170            countries,
171            regions,
172            subregions,
173            timezones,
174        })
175    }
176
177    /// Find the nearest city to the given coordinates.
178    pub fn find_nearest(&self, lat: f64, lon: f64) -> Option<City> {
179        if self.cities.is_empty() {
180            return None;
181        }
182
183        let mut best_idx = 0;
184        let mut best_dist = f64::MAX;
185
186        for (i, city) in self.cities.iter().enumerate() {
187            let clat = city.lat_raw as f64 * 180.0 / 1048576.0 - 90.0;
188            let clon = city.lon_raw as f64 * 360.0 / 1048576.0 - 180.0;
189
190            let dlat = (lat - clat) * std::f64::consts::PI / 180.0;
191            let dlon = (lon - clon) * std::f64::consts::PI / 180.0;
192
193            // Simplified distance (no need for Haversine for nearest-city search)
194            let cos_lat = ((lat + clat) / 2.0 * std::f64::consts::PI / 180.0).cos();
195            let dist = dlat * dlat + (dlon * cos_lat) * (dlon * cos_lat);
196
197            if dist < best_dist {
198                best_dist = dist;
199                best_idx = i;
200            }
201        }
202
203        Some(self.get_city(best_idx))
204    }
205
206    /// Get a city entry by index.
207    fn get_city(&self, idx: usize) -> City {
208        let rec = &self.cities[idx];
209
210        let lat = rec.lat_raw as f64 * 180.0 / 1048576.0 - 90.0;
211        let lon = rec.lon_raw as f64 * 360.0 / 1048576.0 - 180.0;
212
213        let (country_code, country) = self.countries.get(rec.country_idx as usize)
214            .cloned()
215            .unwrap_or_else(|| ("??".into(), "Unknown".into()));
216
217        let region = self.regions.get(rec.region_idx as usize)
218            .cloned()
219            .unwrap_or_default();
220
221        let subregion = self.subregions.get(rec.subregion_idx as usize)
222            .cloned()
223            .unwrap_or_default();
224
225        let timezone = self.timezones.get(rec.tz_idx as usize)
226            .cloned()
227            .unwrap_or_default();
228
229        // Decode population: N.Fe+0E
230        let e = (rec.pop_code >> 20) & 0x0F;
231        let n = (rec.pop_code >> 16) & 0x0F;
232        let f = (rec.pop_code >> 12) & 0x0F;
233        let pop_str = format!("{}.{}e+0{}", n, f, e);
234        let population: u64 = pop_str.parse::<f64>().unwrap_or(0.0) as u64;
235
236        City {
237            name: rec.name.clone(),
238            country_code,
239            country,
240            region,
241            subregion,
242            timezone,
243            population,
244            lat: (lat * 10000.0).round() / 10000.0,
245            lon: (lon * 10000.0).round() / 10000.0,
246        }
247    }
248
249    /// Number of cities in the database.
250    pub fn len(&self) -> usize {
251        self.cities.len()
252    }
253}
254
255fn read_string_list(data: &[u8], pos: &mut usize) -> Vec<String> {
256    let mut list = Vec::new();
257    loop {
258        if *pos + 6 > data.len() {
259            break;
260        }
261        // Check for separator
262        if data[*pos] == 0 && *pos + 3 < data.len() && data[*pos + 1] == 0 && data[*pos + 2] == 0 && data[*pos + 3] == 0 {
263            break;
264        }
265        // Read until newline
266        let start = *pos;
267        while *pos < data.len() && data[*pos] != b'\n' {
268            *pos += 1;
269        }
270        let s = String::from_utf8_lossy(&data[start..*pos]).to_string();
271        list.push(s);
272        if *pos < data.len() {
273            *pos += 1; // Skip newline
274        }
275    }
276    list
277}
278
279fn read_country_list(data: &[u8], pos: &mut usize) -> Vec<(String, String)> {
280    let mut list = Vec::new();
281    loop {
282        if *pos + 6 > data.len() {
283            break;
284        }
285        if data[*pos] == 0 && data[*pos + 1] == 0 && data[*pos + 2] == 0 && data[*pos + 3] == 0 {
286            break;
287        }
288        // Read line: "CCCountryName\n"
289        let start = *pos;
290        while *pos < data.len() && data[*pos] != b'\n' {
291            *pos += 1;
292        }
293        let line = String::from_utf8_lossy(&data[start..*pos]).to_string();
294        if *pos < data.len() { *pos += 1; }
295
296        if line.len() >= 2 {
297            let code = line[..2].to_string();
298            let name = line[2..].to_string();
299            list.push((code, name));
300        }
301    }
302    list
303}
304
305fn skip_separator(data: &[u8], pos: &mut usize) {
306    if *pos + 6 <= data.len() && data[*pos] == 0 {
307        *pos += 6;
308    }
309}