1use serde::Deserialize;
2use sha2::{Digest, Sha256};
3
4use crate::{Error, Gtfs, RawGtfs};
5use std::collections::HashMap;
6use std::convert::TryFrom;
7use std::fs::File;
8use std::io::Read;
9use std::path::Path;
10use web_time::Instant;
11
12#[derive(Derivative)]
35#[derivative(Default)]
36pub struct GtfsReader {
37 #[derivative(Default(value = "true"))]
39 pub read_stop_times: bool,
40 #[derivative(Default(value = "true"))]
42 pub read_shapes: bool,
43 #[derivative(Default(value = "false"))]
45 pub unkown_enum_as_default: bool,
46 #[derivative(Default(value = "true"))]
51 pub trim_fields: bool,
52}
53
54impl GtfsReader {
55 pub fn read_stop_times(mut self, read_stop_times: bool) -> Self {
60 self.read_stop_times = read_stop_times;
61 self
62 }
63
64 pub fn read_shapes(mut self, read_shapes: bool) -> Self {
67 self.read_shapes = read_shapes;
68 self
69 }
70
71 pub fn unkown_enum_as_default(mut self, unkown_enum_as_default: bool) -> Self {
78 self.unkown_enum_as_default = unkown_enum_as_default;
79 self
80 }
81
82 pub fn trim_fields(mut self, trim_fields: bool) -> Self {
87 self.trim_fields = trim_fields;
88 self
89 }
90
91 #[cfg(not(target_arch = "wasm32"))]
96 pub fn read(self, gtfs: &str) -> Result<Gtfs, Error> {
97 self.raw().read(gtfs).and_then(Gtfs::try_from)
98 }
99
100 pub fn read_from_path<P>(self, path: P) -> Result<Gtfs, Error>
102 where
103 P: AsRef<Path>,
104 {
105 self.raw().read_from_path(path).and_then(Gtfs::try_from)
106 }
107
108 #[cfg(all(feature = "read-url", not(target_arch = "wasm32")))]
112 pub fn read_from_url<U: reqwest::IntoUrl>(self, url: U) -> Result<Gtfs, Error> {
113 self.raw().read_from_url(url).and_then(Gtfs::try_from)
114 }
115
116 #[cfg(feature = "read-url")]
120 pub async fn read_from_url_async<U: reqwest::IntoUrl>(self, url: U) -> Result<Gtfs, Error> {
121 self.raw()
122 .read_from_url_async(url)
123 .await
124 .and_then(Gtfs::try_from)
125 }
126
127 pub fn raw(self) -> RawGtfsReader {
139 RawGtfsReader { reader: self }
140 }
141}
142
143pub struct RawGtfsReader {
147 reader: GtfsReader,
148}
149
150impl RawGtfsReader {
151 fn read_from_directory(&self, p: &std::path::Path) -> Result<RawGtfs, Error> {
152 let start_of_read_instant = Instant::now();
153 let files = std::fs::read_dir(p)?
156 .filter_map(|d| {
157 d.ok().and_then(|e| {
158 e.path()
159 .strip_prefix(p)
160 .ok()
161 .and_then(|f| f.to_str().map(|s| s.to_owned()))
162 })
163 })
164 .collect();
165
166 let mut result = RawGtfs {
167 trips: self.read_objs_from_path(p.join("trips.txt")),
168 calendar: self.read_objs_from_optional_path(p, "calendar.txt"),
169 calendar_dates: self.read_objs_from_optional_path(p, "calendar_dates.txt"),
170 stops: self.read_objs_from_path(p.join("stops.txt")),
171 routes: self.read_objs_from_path(p.join("routes.txt")),
172 stop_times: if self.reader.read_stop_times {
173 self.read_objs_from_path(p.join("stop_times.txt"))
174 } else {
175 Ok(Vec::new())
176 },
177 agencies: self.read_objs_from_path(p.join("agency.txt")),
178 shapes: self.read_objs_from_optional_path(p, "shapes.txt"),
179 fare_attributes: self.read_objs_from_optional_path(p, "fare_attributes.txt"),
180 fare_rules: self.read_objs_from_optional_path(p, "fare_rules.txt"),
181 fare_products: self.read_objs_from_optional_path(p, "fare_products.txt"),
182 fare_media: self.read_objs_from_optional_path(p, "fare_media.txt"),
183 rider_categories: self.read_objs_from_optional_path(p, "rider_categories.txt"),
184 frequencies: self.read_objs_from_optional_path(p, "frequencies.txt"),
185 transfers: self.read_objs_from_optional_path(p, "transfers.txt"),
186 pathways: self.read_objs_from_optional_path(p, "pathways.txt"),
187 feed_info: self.read_objs_from_optional_path(p, "feed_info.txt"),
188 read_duration: start_of_read_instant.elapsed(),
189 translations: self.read_objs_from_optional_path(p, "translations.txt"),
190 ticketing_deep_links: self.read_objs_from_optional_path(p, "ticketing_deep_links.txt"),
191 ticketing_identifiers: self
192 .read_objs_from_optional_path(p, "ticketing_identifiers.txt"),
193 files,
194 source_format: crate::SourceFormat::Directory,
195 sha256: None,
196 };
197
198 if self.reader.unkown_enum_as_default {
199 result.unknown_to_default();
200 }
201 Ok(result)
202 }
203
204 #[cfg(not(target_arch = "wasm32"))]
207 pub fn read(self, gtfs: &str) -> Result<RawGtfs, Error> {
208 #[cfg(feature = "read-url")]
209 if gtfs.starts_with("http") {
210 return self.read_from_url(gtfs);
211 }
212 self.read_from_path(gtfs)
213 }
214
215 #[cfg(all(feature = "read-url", not(target_arch = "wasm32")))]
217 pub fn read_from_url<U: reqwest::IntoUrl>(self, url: U) -> Result<RawGtfs, Error> {
218 let mut res = reqwest::blocking::get(url)?;
219 let mut body = Vec::new();
220 res.read_to_end(&mut body)?;
221 let cursor = std::io::Cursor::new(body);
222 self.read_from_reader(cursor)
223 }
224
225 #[cfg(feature = "read-url")]
227 pub async fn read_from_url_async<U: reqwest::IntoUrl>(self, url: U) -> Result<RawGtfs, Error> {
228 let res = reqwest::get(url).await?.bytes().await?;
229 let reader = std::io::Cursor::new(res);
230 self.read_from_reader(reader)
231 }
232
233 pub fn read_from_path<P>(&self, path: P) -> Result<RawGtfs, Error>
235 where
236 P: AsRef<Path>,
237 {
238 let p = path.as_ref();
239 if p.is_file() {
240 let reader = File::open(p)?;
241 self.read_from_reader(reader)
242 } else if p.is_dir() {
243 self.read_from_directory(p)
244 } else {
245 Err(Error::NotFileNorDirectory(format!("{}", p.display())))
246 }
247 }
248
249 pub fn read_from_reader<T: std::io::Read + std::io::Seek>(
250 &self,
251 reader: T,
252 ) -> Result<RawGtfs, Error> {
253 let start_of_read_instant = Instant::now();
254 let mut hasher = Sha256::new();
255 let mut buf_reader = std::io::BufReader::new(reader);
256 let _n = std::io::copy(&mut buf_reader, &mut hasher)?;
257 let hash = hasher.finalize();
258 let mut archive = zip::ZipArchive::new(buf_reader)?;
259 let mut file_mapping = HashMap::new();
260 let mut files = Vec::new();
261
262 for i in 0..archive.len() {
263 let archive_file = archive.by_index(i)?;
264 files.push(archive_file.name().to_owned());
265
266 for gtfs_file in &[
267 "agency.txt",
268 "calendar.txt",
269 "calendar_dates.txt",
270 "routes.txt",
271 "stops.txt",
272 "stop_times.txt",
273 "trips.txt",
274 "fare_attributes.txt",
275 "fare_rules.txt",
276 "fare_products.txt",
277 "fare_media.txt",
278 "rider_categories.txt",
279 "frequencies.txt",
280 "transfers.txt",
281 "pathways.txt",
282 "feed_info.txt",
283 "shapes.txt",
284 ] {
285 let path = std::path::Path::new(archive_file.name());
286 if path.file_name() == Some(std::ffi::OsStr::new(gtfs_file)) {
287 file_mapping.insert(gtfs_file, i);
288 break;
289 }
290 }
291 }
292
293 let mut result = RawGtfs {
294 agencies: self.read_file(&file_mapping, &mut archive, "agency.txt"),
295 calendar: self.read_optional_file(&file_mapping, &mut archive, "calendar.txt"),
296 calendar_dates: self.read_optional_file(
297 &file_mapping,
298 &mut archive,
299 "calendar_dates.txt",
300 ),
301 routes: self.read_file(&file_mapping, &mut archive, "routes.txt"),
302 stops: self.read_file(&file_mapping, &mut archive, "stops.txt"),
303 stop_times: if self.reader.read_stop_times {
304 self.read_file(&file_mapping, &mut archive, "stop_times.txt")
305 } else {
306 Ok(Vec::new())
307 },
308 trips: self.read_file(&file_mapping, &mut archive, "trips.txt"),
309 fare_attributes: self.read_optional_file(
310 &file_mapping,
311 &mut archive,
312 "fare_attributes.txt",
313 ),
314 fare_rules: self.read_optional_file(&file_mapping, &mut archive, "fare_rules.txt"),
315 fare_products: self.read_optional_file(
316 &file_mapping,
317 &mut archive,
318 "fare_products.txt",
319 ),
320 fare_media: self.read_optional_file(&file_mapping, &mut archive, "fare_media.txt"),
321 rider_categories: self.read_optional_file(
322 &file_mapping,
323 &mut archive,
324 "rider_categories.txt",
325 ),
326 frequencies: self.read_optional_file(&file_mapping, &mut archive, "frequencies.txt"),
327 transfers: self.read_optional_file(&file_mapping, &mut archive, "transfers.txt"),
328 pathways: self.read_optional_file(&file_mapping, &mut archive, "pathways.txt"),
329 feed_info: self.read_optional_file(&file_mapping, &mut archive, "feed_info.txt"),
330 shapes: if self.reader.read_shapes {
331 self.read_optional_file(&file_mapping, &mut archive, "shapes.txt")
332 } else {
333 Some(Ok(Vec::new()))
334 },
335 translations: self.read_optional_file(&file_mapping, &mut archive, "translations.txt"),
336 ticketing_deep_links: self.read_optional_file(
337 &file_mapping,
338 &mut archive,
339 "ticketing_deep_links.txt",
340 ),
341 ticketing_identifiers: self.read_optional_file(
342 &file_mapping,
343 &mut archive,
344 "ticketing_identifiers.txt",
345 ),
346 read_duration: start_of_read_instant.elapsed(),
347 files,
348 source_format: crate::SourceFormat::Zip,
349 sha256: Some(format!("{hash:x}")),
350 };
351
352 if self.reader.unkown_enum_as_default {
353 result.unknown_to_default();
354 }
355 Ok(result)
356 }
357
358 fn read_objs<T, O>(&self, mut reader: T, file_name: &str) -> Result<Vec<O>, Error>
359 where
360 for<'de> O: Deserialize<'de>,
361 T: std::io::Read,
362 {
363 let mut bom = [0; 3];
364 reader
365 .read_exact(&mut bom)
366 .map_err(|e| Error::NamedFileIO {
367 file_name: file_name.to_owned(),
368 source: Box::new(e),
369 })?;
370
371 let chained = if bom != [0xefu8, 0xbbu8, 0xbfu8] {
372 bom.chain(reader)
373 } else {
374 [].chain(reader)
375 };
376
377 let mut reader = csv::ReaderBuilder::new()
378 .flexible(true)
379 .trim(if self.reader.trim_fields {
380 csv::Trim::Fields
381 } else {
382 csv::Trim::None
383 })
384 .from_reader(chained);
385 let headers = reader
387 .headers()
388 .map_err(|e| Error::CSVError {
389 file_name: file_name.to_owned(),
390 source: e,
391 line_in_error: None,
392 })?
393 .clone()
394 .into_iter()
395 .map(|x| x.trim())
396 .collect::<csv::StringRecord>();
397
398 let mut rec = csv::StringRecord::new();
400 let mut objs = Vec::new();
401
402 while reader.read_record(&mut rec).map_err(|e| Error::CSVError {
404 file_name: file_name.to_owned(),
405 source: e,
406 line_in_error: None,
407 })? {
408 let obj = rec
409 .deserialize(Some(&headers))
410 .map_err(|e| Error::CSVError {
411 file_name: file_name.to_owned(),
412 source: e,
413 line_in_error: Some(crate::error::LineError {
414 headers: headers.into_iter().map(String::from).collect(),
415 values: rec.into_iter().map(String::from).collect(),
416 }),
417 })?;
418 objs.push(obj);
419 }
420 Ok(objs)
421 }
422
423 fn read_objs_from_path<O>(&self, path: std::path::PathBuf) -> Result<Vec<O>, Error>
424 where
425 for<'de> O: Deserialize<'de>,
426 {
427 let file_name = path
428 .file_name()
429 .and_then(|f| f.to_str())
430 .unwrap_or("invalid_file_name")
431 .to_string();
432 if path.exists() {
433 File::open(path)
434 .map_err(|e| Error::NamedFileIO {
435 file_name: file_name.to_owned(),
436 source: Box::new(e),
437 })
438 .and_then(|r| self.read_objs(r, &file_name))
439 } else {
440 Err(Error::MissingFile(file_name))
441 }
442 }
443
444 fn read_objs_from_optional_path<O>(
445 &self,
446 dir_path: &std::path::Path,
447 file_name: &str,
448 ) -> Option<Result<Vec<O>, Error>>
449 where
450 for<'de> O: Deserialize<'de>,
451 {
452 File::open(dir_path.join(file_name))
453 .ok()
454 .map(|r| self.read_objs(r, file_name))
455 }
456
457 fn read_file<O, T>(
458 &self,
459 file_mapping: &HashMap<&&str, usize>,
460 archive: &mut zip::ZipArchive<T>,
461 file_name: &str,
462 ) -> Result<Vec<O>, Error>
463 where
464 for<'de> O: Deserialize<'de>,
465 T: std::io::Read + std::io::Seek,
466 {
467 self.read_optional_file(file_mapping, archive, file_name)
468 .unwrap_or_else(|| Err(Error::MissingFile(file_name.to_owned())))
469 }
470
471 fn read_optional_file<O, T>(
472 &self,
473 file_mapping: &HashMap<&&str, usize>,
474 archive: &mut zip::ZipArchive<T>,
475 file_name: &str,
476 ) -> Option<Result<Vec<O>, Error>>
477 where
478 for<'de> O: Deserialize<'de>,
479 T: std::io::Read + std::io::Seek,
480 {
481 file_mapping.get(&file_name).map(|i| {
482 self.read_objs(
483 archive.by_index(*i).map_err(|e| Error::NamedFileIO {
484 file_name: file_name.to_owned(),
485 source: Box::new(e),
486 })?,
487 file_name,
488 )
489 })
490 }
491}