1#[macro_use]
55extern crate serde_derive;
56extern crate rayon;
57extern crate redis;
58extern crate sift4;
59use rayon::iter::{IntoParallelIterator, ParallelIterator};
60use sift4::*;
61use std::cmp::Ordering;
62use std::error::Error;
63use std::fs;
64
65pub mod countries;
67
68pub mod regions;
70
71use self::countries::*;
72use self::regions::*;
73
74#[derive(Debug)]
76pub struct CityData {
77 names: Vec<String>,
78 countries: Vec<Country>,
79 regions: Vec<Region>,
80 latitudes: Vec<f32>,
81 longitudes: Vec<f32>,
82}
83
84#[derive(Debug)]
87pub struct City<'a> {
88 name: &'a str,
89 country: Country,
90 region: Region,
91 latitude: f32,
92 longitude: f32,
93}
94
95#[derive(Debug, Copy, Clone)]
97pub struct Coordinate {
98 latitude: f32,
99 longitude: f32,
100}
101
102impl Coordinate {
103 pub fn new(latitude: f32, longitude: f32) -> Coordinate {
105 Coordinate {
106 latitude,
107 longitude,
108 }
109 }
110}
111
112#[derive(Debug, Serialize, Deserialize)]
115pub struct FuzzyResult {
116 city: String,
117 latitude: f32,
118 longitude: f32,
119 score: f32,
120}
121
122impl FuzzyResult {
123 pub fn new(city_data: City, score: f32) -> FuzzyResult {
126 let City {
127 name,
128 country,
129 region,
130 latitude,
131 longitude,
132 } = city_data;
133 let city = format!("{}, {}, {}", name, region, country);
134 FuzzyResult {
135 city,
136 latitude,
137 longitude,
138 score,
139 }
140 }
141}
142
143impl CityData {
144 pub fn new() -> Self {
147 CityData {
148 names: Vec::new(),
149 countries: Vec::new(),
150 regions: Vec::new(),
151 latitudes: Vec::new(),
152 longitudes: Vec::new(),
153 }
154 }
155
156 pub fn populate_from_file(&mut self, filename: &str) -> Result<(), Box<dyn Error>> {
165 let buffer = fs::read_to_string(filename)?;
166 let mut lines = buffer.lines();
167
168 lines.next();
170
171 for line in lines {
172 if let [name, country, region, latitude, longitude] =
173 line.split(',').collect::<Vec<&str>>()[..]
174 {
175 let latitude: f32 = latitude.parse()?;
176 let longitude: f32 = longitude.parse()?;
177
178 let country = match country {
179 "US" => Country::US,
180 "CA" => Country::CA,
181 _ => continue,
182 };
183
184 let region = match country {
185 Country::US => CityData::us_match(region),
186 Country::CA => CityData::ca_match(region),
187 };
188
189 self.add_city(name, country, region, latitude, longitude);
190 };
191 }
192
193 Ok(())
194 }
195
196 fn ca_match(region: &str) -> Region {
199 match region {
200 "01" => Region::Province(CAProvince::AB),
201 "02" => Region::Province(CAProvince::BC),
202 "03" => Region::Province(CAProvince::MB),
203 "04" => Region::Province(CAProvince::NB),
204 "05" => Region::Province(CAProvince::NL),
205 "07" => Region::Province(CAProvince::NS),
206 "08" => Region::Province(CAProvince::ON),
207 "09" => Region::Province(CAProvince::PE),
208 "10" => Region::Province(CAProvince::QC),
209 "11" => Region::Province(CAProvince::SK),
210 "12" => Region::Territory(CATerritory::YT),
211 "13" => Region::Territory(CATerritory::NT),
212 "14" => Region::Territory(CATerritory::NU),
213 _ => Region::None,
214 }
215 }
216
217 fn us_match(region: &str) -> Region {
218 match region {
219 "AL" => Region::State(USState::AL),
220 "AK" => Region::State(USState::AK),
221 "AZ" => Region::State(USState::AZ),
222 "AR" => Region::State(USState::AR),
223 "CA" => Region::State(USState::CA),
224 "CO" => Region::State(USState::CO),
225 "CT" => Region::State(USState::CT),
226 "DE" => Region::State(USState::DE),
227 "FL" => Region::State(USState::FL),
228 "GA" => Region::State(USState::GA),
229 "HI" => Region::State(USState::HI),
230 "ID" => Region::State(USState::ID),
231 "IL" => Region::State(USState::IL),
232 "IN" => Region::State(USState::IN),
233 "IA" => Region::State(USState::IA),
234 "KS" => Region::State(USState::KS),
235 "KY" => Region::State(USState::KY),
236 "LA" => Region::State(USState::LA),
237 "ME" => Region::State(USState::ME),
238 "MD" => Region::State(USState::MD),
239 "MA" => Region::State(USState::MA),
240 "MI" => Region::State(USState::MI),
241 "MN" => Region::State(USState::MN),
242 "MS" => Region::State(USState::MS),
243 "MO" => Region::State(USState::MO),
244 "MT" => Region::State(USState::MT),
245 "NE" => Region::State(USState::NE),
246 "NV" => Region::State(USState::NV),
247 "NH" => Region::State(USState::NH),
248 "NJ" => Region::State(USState::NJ),
249 "NM" => Region::State(USState::NM),
250 "NY" => Region::State(USState::NY),
251 "NC" => Region::State(USState::NC),
252 "ND" => Region::State(USState::ND),
253 "OH" => Region::State(USState::OH),
254 "OK" => Region::State(USState::OK),
255 "OR" => Region::State(USState::OR),
256 "PA" => Region::State(USState::PA),
257 "RI" => Region::State(USState::RI),
258 "SC" => Region::State(USState::SC),
259 "SD" => Region::State(USState::SD),
260 "TN" => Region::State(USState::TN),
261 "TX" => Region::State(USState::TX),
262 "UT" => Region::State(USState::UT),
263 "VT" => Region::State(USState::VT),
264 "VA" => Region::State(USState::VA),
265 "WA" => Region::State(USState::WA),
266 "WV" => Region::State(USState::WV),
267 "WI" => Region::State(USState::WI),
268 "WY" => Region::State(USState::WY),
269 _ => Region::None,
270 }
271 }
272
273 fn add_city(
274 &mut self,
275 name: &str,
276 country: Country,
277 region: Region,
278 latitude: f32,
279 longitude: f32,
280 ) {
281 self.names.push(name.to_string());
282 self.countries.push(country);
283 self.regions.push(region);
284 self.latitudes.push(latitude);
285 self.longitudes.push(longitude);
286 }
287
288 pub fn get_city(&self, idx: usize) -> City {
311 City {
312 name: &self.names[idx],
313 country: self.countries[idx],
314 region: self.regions[idx],
315 latitude: self.latitudes[idx],
316 longitude: self.longitudes[idx],
317 }
318 }
319
320 pub fn total_score(&self, term: &str, idx: usize, loc: Option<Coordinate>) -> f32 {
338 let city = &self.names[idx];
339 let latitude = self.latitudes[idx];
340 let longitude = self.longitudes[idx];
341 let city_loc = Coordinate {
342 latitude,
343 longitude,
344 };
345
346 let str_dist = sift4(city, term) as f32;
347 let mut str_score = if str_dist >= term.len() as f32 {
348 0.0
349 } else {
350 (term.len() as f32 - str_dist) / term.len() as f32
351 };
352
353 if str_score == 0.0 {
354 return 0.0;
355 };
356
357 if city.chars().next().unwrap() != term.chars().next().unwrap() {
359 if str_score < 0.1 {
360 str_score = 0.0;
361 } else {
362 str_score -= 0.1;
363 }
364 }
365
366 let mut dist_score = str_score;
367
368 if let Some(loc2) = loc {
369 let phys_dist = CityData::find_distance_earth(city_loc, loc2);
370 dist_score = CityData::dist_score(phys_dist);
371 };
372
373 (str_score * 5.0 + dist_score * 3.0) / 8.0
374 }
375
376 pub fn find_distance_earth(loc1: Coordinate, loc2: Coordinate) -> f32 {
391 const R: f32 = 6372.8;
392 let Coordinate {
393 latitude: mut lat1,
394 longitude: mut long1,
395 } = loc1;
396 let Coordinate {
397 latitude: mut lat2,
398 longitude: long2,
399 } = loc2;
400 long1 -= long2;
401 long1 = long1.to_radians();
402 lat1 = lat1.to_radians();
403 lat2 = lat2.to_radians();
404 let dz: f32 = lat1.sin() - lat2.sin();
405 let dx: f32 = long1.cos() * lat1.cos() - lat2.cos();
406 let dy: f32 = long1.sin() * lat1.cos();
407 ((dx * dx + dy * dy + dz * dz).sqrt() / 2.0).asin() * 2.0 * R
408 }
409
410 fn dist_score(dist: f32) -> f32 {
413 if dist < 92.0 {
414 1.0
415 } else {
416 92.0 / (dist.powf(2.0) - (91.9 as f32).powf(2.0))
417 }
418 }
419
420 pub fn search(&self, term: &str, loc: Option<Coordinate>) -> Vec<FuzzyResult> {
445 let mut found: Vec<(usize, f32)> = (0..self.names.len())
446 .into_par_iter()
447 .map(|i| (i, self.total_score(term, i, loc)))
448 .filter(|(_, score)| score > &0.5)
449 .collect();
450
451 found.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
452
453 found
454 .iter()
455 .map(|result| FuzzyResult::new(self.get_city(result.0), result.1))
456 .collect()
457 }
458}
459
460#[cfg(test)]
461mod tests {
462 use super::*;
463
464 #[test]
465 fn test_citydata_struct_nyc() {
466 let mut cities = CityData::new();
467 cities.add_city(
468 "New York City",
469 Country::US,
470 Region::State(USState::NY),
471 40.7128,
472 74.0060,
473 );
474 assert_eq!(format!("{:?}", cities.get_city(0)), "City { name: \"New York City\", country: US, region: State(NY), latitude: 40.7128, longitude: 74.006 }");
475 }
476
477 #[test]
478 fn test_citydata_struct_sf() {
479 let mut cities = CityData::new();
480 cities.add_city(
481 "San Francisco",
482 Country::US,
483 Region::State(USState::CA),
484 37.7749,
485 122.4194,
486 );
487 assert_eq!(format!("{:?}", cities.get_city(0)), "City { name: \"San Francisco\", country: US, region: State(CA), latitude: 37.7749, longitude: 122.4194 }");
488 }
489
490 #[test]
491 fn test_populate_from_file() {
492 let mut cities = CityData::new();
493 cities
494 .populate_from_file("data/cities_canada-usa-filtered.csv")
495 .unwrap();
496 assert_eq!(
497 format!("{:?}", cities.get_city(0)),
498 "City { name: \"Abbotsford\", country: CA, region: Province(BC), latitude: 49.05798, longitude: -122.25257 }"
499 );
500 }
501
502 #[test]
503 fn test_str_dist() {
504 assert_eq!(sift4("Londo", "London"), 1);
505 }
506
507 #[test]
508 fn test_phys_dist() {
509 let sf = Coordinate {
510 latitude: 37.774929,
511 longitude: -122.419416,
512 };
513 let nyc = Coordinate {
514 latitude: 40.730610,
515 longitude: -73.935242,
516 };
517 assert_eq!(CityData::find_distance_earth(sf, nyc), 4135.694);
518 }
519
520 #[test]
521 fn test_dist_score() {
522 assert_eq!(CityData::dist_score(4135.694), 0.0000053815274);
523 }
524
525 #[test]
526 fn test_total_score_no_gps() {
527 let mut cities = CityData::new();
528 cities
529 .populate_from_file("data/cities_canada-usa-filtered.csv")
530 .unwrap();
531 assert_eq!(cities.total_score("Abbotsfor", 0, None), 0.88888896);
532 }
533
534 #[test]
535 fn test_search_with_gps() {
536 let mut cities = CityData::new();
537 cities
538 .populate_from_file("data/cities_canada-usa-filtered.csv")
539 .unwrap();
540 let london = Coordinate {
541 latitude: 42.98339,
542 longitude: -81.23304,
543 };
544 let results = cities.search("London", Some(london));
545 assert_eq!(
546 format!("{:?}", results),
547 "[FuzzyResult { city: \"London, ON, CA\", latitude: 42.98339, longitude: -81.23304, score: 1.0 }, FuzzyResult { city: \"London, OH, US\", latitude: 39.88645, longitude: -83.44825, score: 0.6252391 }, FuzzyResult { city: \"London, KY, US\", latitude: 37.12898, longitude: -84.08326, score: 0.6250727 }, FuzzyResult { city: \"Lemont, IL, US\", latitude: 41.67364, longitude: -88.00173, score: 0.52094036 }, FuzzyResult { city: \"Brant, ON, CA\", latitude: 43.1334, longitude: -80.34967, score: 0.5208334 }]"
548 );
549 }
550}