Skip to main content

exiftool_rs/
geolocation.rs

1//! Geolocation reverse geocoding using ExifTool's Geolocation.dat.
2//!
3//! Loads the binary database and finds the nearest city to a GPS coordinate.
4
5use std::path::Path;
6
7/// A city entry from the geolocation database.
8#[derive(Debug, Clone)]
9pub struct City {
10    pub name: String,
11    pub country_code: String,
12    pub country: String,
13    pub region: String,
14    pub subregion: String,
15    pub timezone: String,
16    pub population: u64,
17    pub lat: f64,
18    pub lon: f64,
19}
20
21/// The geolocation database.
22pub struct GeolocationDb {
23    cities: Vec<CityRecord>,
24    countries: Vec<(String, String)>, // (code, name)
25    regions: Vec<String>,
26    subregions: Vec<String>,
27    timezones: Vec<String>,
28}
29
30/// Raw binary city record (13 bytes + name).
31struct CityRecord {
32    lat_raw: u32, // 20-bit
33    lon_raw: u32, // 20-bit
34    country_idx: u8,
35    pop_code: u32,
36    region_idx: u16,
37    subregion_idx: u16,
38    tz_idx: u16,
39    name: String,
40}
41
42impl GeolocationDb {
43    /// Load the database from ExifTool's Geolocation.dat file.
44    pub fn load<P: AsRef<Path>>(path: P) -> Option<Self> {
45        let data = std::fs::read(path.as_ref()).ok()?;
46        Self::parse(&data)
47    }
48
49    /// Try to find the database in common locations.
50    pub fn load_default() -> Option<Self> {
51        // Try relative to executable, then common ExifTool locations
52        let candidates = [
53            "Geolocation.dat",
54            "../exiftool/lib/Image/ExifTool/Geolocation.dat",
55            "/usr/share/exiftool/Geolocation.dat",
56            "/usr/local/share/exiftool/Geolocation.dat",
57        ];
58
59        for path in &candidates {
60            if let Some(db) = Self::load(path) {
61                return Some(db);
62            }
63        }
64
65        // Try relative to the executable path
66        if let Ok(exe) = std::env::current_exe() {
67            if let Some(dir) = exe.parent() {
68                let p = dir.join("Geolocation.dat");
69                if let Some(db) = Self::load(&p) {
70                    return Some(db);
71                }
72            }
73        }
74
75        None
76    }
77
78    /// Parse the binary database.
79    fn parse(data: &[u8]) -> Option<Self> {
80        // Find first newline (end of header line)
81        let header_end = data.iter().position(|&b| b == b'\n')?;
82        let header = std::str::from_utf8(&data[..header_end]).ok()?;
83
84        // Validate header: "GeolocationX.XX\tNNNN"
85        if !header.starts_with("Geolocation") {
86            return None;
87        }
88        let tab_pos = header.find('\t')?;
89        let city_count: usize = header[tab_pos + 1..].parse().ok()?;
90
91        // Skip comment line
92        let mut pos = header_end + 1;
93        if pos < data.len() && data[pos] == b'#' {
94            while pos < data.len() && data[pos] != b'\n' {
95                pos += 1;
96            }
97            pos += 1;
98        }
99
100        // Read city records
101        let mut cities = Vec::with_capacity(city_count);
102        loop {
103            if pos + 6 > data.len() {
104                break;
105            }
106            // Check for section separator: \0\0\0\0\xNN\n
107            if data[pos] == 0 && data[pos + 1] == 0 && data[pos + 2] == 0 && data[pos + 3] == 0 {
108                pos += 6; // Skip separator
109                break;
110            }
111
112            // Need at least 14 bytes (13 binary + 1 for name + newline)
113            if pos + 14 > data.len() {
114                break;
115            }
116
117            // Parse 13-byte binary header
118            let lt = u16::from_be_bytes([data[pos], data[pos + 1]]);
119            let f = data[pos + 2];
120            let ln = u16::from_be_bytes([data[pos + 3], data[pos + 4]]);
121            let code =
122                u32::from_be_bytes([data[pos + 5], data[pos + 6], data[pos + 7], data[pos + 8]]);
123            let sn = u16::from_be_bytes([data[pos + 9], data[pos + 10]]);
124            let tn = data[pos + 11];
125            let fn_byte = data[pos + 12];
126
127            let lat_raw = ((lt as u32) << 4) | ((f >> 4) as u32);
128            let lon_raw = ((ln as u32) << 4) | ((f & 0x0F) as u32);
129            let country_idx = (code >> 24) as u8;
130            let region_idx = (code & 0x0FFF) as u16;
131            let subregion_idx = sn & 0x7FFF;
132
133            // Timezone: 9-bit index
134            let tz_high = (fn_byte >> 7) as u16; // v1.03: bit 7 of feature byte
135            let tz_idx = (tz_high << 8) | (tn as u16);
136
137            // Find city name (UTF-8 until newline)
138            let name_start = pos + 13;
139            let name_end = data[name_start..]
140                .iter()
141                .position(|&b| b == b'\n')
142                .map(|p| name_start + p)
143                .unwrap_or(data.len());
144
145            let name =
146                crate::encoding::decode_utf8_or_latin1(&data[name_start..name_end]).to_string();
147
148            cities.push(CityRecord {
149                lat_raw,
150                lon_raw,
151                country_idx,
152                pop_code: code,
153                region_idx,
154                subregion_idx,
155                tz_idx,
156                name,
157            });
158
159            pos = name_end + 1;
160        }
161
162        // Read string lists
163        // Countries: "CCCountryName\n" (2-char code + name on same line)
164        let countries = read_country_list(data, &mut pos);
165        skip_separator(data, &mut pos);
166        let regions = read_string_list(data, &mut pos);
167        skip_separator(data, &mut pos);
168        let subregions = read_string_list(data, &mut pos);
169        skip_separator(data, &mut pos);
170        let timezones = read_string_list(data, &mut pos);
171
172        Some(GeolocationDb {
173            cities,
174            countries,
175            regions,
176            subregions,
177            timezones,
178        })
179    }
180
181    /// Find the nearest city to the given coordinates.
182    pub fn find_nearest(&self, lat: f64, lon: f64) -> Option<City> {
183        if self.cities.is_empty() {
184            return None;
185        }
186
187        let mut best_idx = 0;
188        let mut best_dist = f64::MAX;
189
190        for (i, city) in self.cities.iter().enumerate() {
191            let clat = city.lat_raw as f64 * 180.0 / 1048576.0 - 90.0;
192            let clon = city.lon_raw as f64 * 360.0 / 1048576.0 - 180.0;
193
194            let dlat = (lat - clat) * std::f64::consts::PI / 180.0;
195            let dlon = (lon - clon) * std::f64::consts::PI / 180.0;
196
197            // Simplified distance (no need for Haversine for nearest-city search)
198            let cos_lat = ((lat + clat) / 2.0 * std::f64::consts::PI / 180.0).cos();
199            let dist = dlat * dlat + (dlon * cos_lat) * (dlon * cos_lat);
200
201            if dist < best_dist {
202                best_dist = dist;
203                best_idx = i;
204            }
205        }
206
207        Some(self.get_city(best_idx))
208    }
209
210    /// Get a city entry by index.
211    fn get_city(&self, idx: usize) -> City {
212        let rec = &self.cities[idx];
213
214        let lat = rec.lat_raw as f64 * 180.0 / 1048576.0 - 90.0;
215        let lon = rec.lon_raw as f64 * 360.0 / 1048576.0 - 180.0;
216
217        let (country_code, country) = self
218            .countries
219            .get(rec.country_idx as usize)
220            .cloned()
221            .unwrap_or_else(|| ("??".into(), "Unknown".into()));
222
223        let region = self
224            .regions
225            .get(rec.region_idx as usize)
226            .cloned()
227            .unwrap_or_default();
228
229        let subregion = self
230            .subregions
231            .get(rec.subregion_idx as usize)
232            .cloned()
233            .unwrap_or_default();
234
235        let timezone = self
236            .timezones
237            .get(rec.tz_idx as usize)
238            .cloned()
239            .unwrap_or_default();
240
241        // Decode population: N.Fe+0E
242        let e = (rec.pop_code >> 20) & 0x0F;
243        let n = (rec.pop_code >> 16) & 0x0F;
244        let f = (rec.pop_code >> 12) & 0x0F;
245        let pop_str = format!("{}.{}e+0{}", n, f, e);
246        let population: u64 = pop_str.parse::<f64>().unwrap_or(0.0) as u64;
247
248        City {
249            name: rec.name.clone(),
250            country_code,
251            country,
252            region,
253            subregion,
254            timezone,
255            population,
256            lat: (lat * 10000.0).round() / 10000.0,
257            lon: (lon * 10000.0).round() / 10000.0,
258        }
259    }
260
261    /// Number of cities in the database.
262    pub fn len(&self) -> usize {
263        self.cities.len()
264    }
265
266    /// Returns true if the database contains no cities.
267    pub fn is_empty(&self) -> bool {
268        self.cities.is_empty()
269    }
270}
271
272fn read_string_list(data: &[u8], pos: &mut usize) -> Vec<String> {
273    let mut list = Vec::new();
274    loop {
275        if *pos + 6 > data.len() {
276            break;
277        }
278        // Check for separator
279        if data[*pos] == 0
280            && *pos + 3 < data.len()
281            && data[*pos + 1] == 0
282            && data[*pos + 2] == 0
283            && data[*pos + 3] == 0
284        {
285            break;
286        }
287        // Read until newline
288        let start = *pos;
289        while *pos < data.len() && data[*pos] != b'\n' {
290            *pos += 1;
291        }
292        let s = crate::encoding::decode_utf8_or_latin1(&data[start..*pos]).to_string();
293        list.push(s);
294        if *pos < data.len() {
295            *pos += 1; // Skip newline
296        }
297    }
298    list
299}
300
301fn read_country_list(data: &[u8], pos: &mut usize) -> Vec<(String, String)> {
302    let mut list = Vec::new();
303    loop {
304        if *pos + 6 > data.len() {
305            break;
306        }
307        if data[*pos] == 0 && data[*pos + 1] == 0 && data[*pos + 2] == 0 && data[*pos + 3] == 0 {
308            break;
309        }
310        // Read line: "CCCountryName\n"
311        let start = *pos;
312        while *pos < data.len() && data[*pos] != b'\n' {
313            *pos += 1;
314        }
315        let line = crate::encoding::decode_utf8_or_latin1(&data[start..*pos]).to_string();
316        if *pos < data.len() {
317            *pos += 1;
318        }
319
320        if line.len() >= 2 {
321            let code = line[..2].to_string();
322            let name = line[2..].to_string();
323            list.push((code, name));
324        }
325    }
326    list
327}
328
329fn skip_separator(data: &[u8], pos: &mut usize) {
330    if *pos + 6 <= data.len() && data[*pos] == 0 {
331        *pos += 6;
332    }
333}