1use serde::Deserialize;
2use sha2::{Digest, Sha256};
3
4use crate::{Error, Gtfs, RawGtfs};
5use std::collections::HashMap;
6use std::convert::TryFrom;
7use std::fs::File;
8use std::io::Read;
9use std::path::Path;
10use web_time::Instant;
11
12pub struct GtfsReader {
35 pub read_stop_times: bool,
37 pub read_shapes: bool,
39 pub unkown_enum_as_default: bool,
41 pub trim_fields: bool,
46}
47
48impl Default for GtfsReader {
49 fn default() -> Self {
50 GtfsReader {
51 read_stop_times: true,
52 read_shapes: true,
53 unkown_enum_as_default: false,
54 trim_fields: true,
55 }
56 }
57}
58
59impl GtfsReader {
60 pub fn read_stop_times(mut self, read_stop_times: bool) -> Self {
65 self.read_stop_times = read_stop_times;
66 self
67 }
68
69 pub fn read_shapes(mut self, read_shapes: bool) -> Self {
72 self.read_shapes = read_shapes;
73 self
74 }
75
76 pub fn unkown_enum_as_default(mut self, unkown_enum_as_default: bool) -> Self {
83 self.unkown_enum_as_default = unkown_enum_as_default;
84 self
85 }
86
87 pub fn trim_fields(mut self, trim_fields: bool) -> Self {
92 self.trim_fields = trim_fields;
93 self
94 }
95
96 #[cfg(not(target_arch = "wasm32"))]
101 pub fn read(self, gtfs: &str) -> Result<Gtfs, Error> {
102 self.raw().read(gtfs).and_then(Gtfs::try_from)
103 }
104
105 pub fn read_from_path<P>(self, path: P) -> Result<Gtfs, Error>
107 where
108 P: AsRef<Path>,
109 {
110 self.raw().read_from_path(path).and_then(Gtfs::try_from)
111 }
112
113 #[cfg(all(feature = "read-url", not(target_arch = "wasm32")))]
117 pub fn read_from_url<U: reqwest::IntoUrl>(self, url: U) -> Result<Gtfs, Error> {
118 self.raw().read_from_url(url).and_then(Gtfs::try_from)
119 }
120
121 #[cfg(feature = "read-url")]
125 pub async fn read_from_url_async<U: reqwest::IntoUrl>(self, url: U) -> Result<Gtfs, Error> {
126 self.raw()
127 .read_from_url_async(url)
128 .await
129 .and_then(Gtfs::try_from)
130 }
131
132 pub fn raw(self) -> RawGtfsReader {
144 RawGtfsReader { reader: self }
145 }
146}
147
148pub struct RawGtfsReader {
152 reader: GtfsReader,
153}
154
155impl RawGtfsReader {
156 fn read_from_directory(&self, p: &std::path::Path) -> Result<RawGtfs, Error> {
157 let start_of_read_instant = Instant::now();
158 let files = std::fs::read_dir(p)?
161 .filter_map(|d| {
162 d.ok().and_then(|e| {
163 e.path()
164 .strip_prefix(p)
165 .ok()
166 .and_then(|f| f.to_str().map(|s| s.to_owned()))
167 })
168 })
169 .collect();
170
171 let mut result = RawGtfs {
172 trips: self.read_objs_from_path(p.join("trips.txt")),
173 calendar: self.read_objs_from_optional_path(p, "calendar.txt"),
174 calendar_dates: self.read_objs_from_optional_path(p, "calendar_dates.txt"),
175 stops: self.read_objs_from_path(p.join("stops.txt")),
176 routes: self.read_objs_from_path(p.join("routes.txt")),
177 stop_times: if self.reader.read_stop_times {
178 self.read_objs_from_path(p.join("stop_times.txt"))
179 } else {
180 Ok(Vec::new())
181 },
182 agencies: self.read_objs_from_path(p.join("agency.txt")),
183 shapes: self.read_objs_from_optional_path(p, "shapes.txt"),
184 fare_attributes: self.read_objs_from_optional_path(p, "fare_attributes.txt"),
185 fare_rules: self.read_objs_from_optional_path(p, "fare_rules.txt"),
186 fare_products: self.read_objs_from_optional_path(p, "fare_products.txt"),
187 fare_media: self.read_objs_from_optional_path(p, "fare_media.txt"),
188 rider_categories: self.read_objs_from_optional_path(p, "rider_categories.txt"),
189 frequencies: self.read_objs_from_optional_path(p, "frequencies.txt"),
190 transfers: self.read_objs_from_optional_path(p, "transfers.txt"),
191 pathways: self.read_objs_from_optional_path(p, "pathways.txt"),
192 feed_info: self.read_objs_from_optional_path(p, "feed_info.txt"),
193 read_duration: start_of_read_instant.elapsed(),
194 translations: self.read_objs_from_optional_path(p, "translations.txt"),
195 ticketing_deep_links: self.read_objs_from_optional_path(p, "ticketing_deep_links.txt"),
196 ticketing_identifiers: self
197 .read_objs_from_optional_path(p, "ticketing_identifiers.txt"),
198 files,
199 source_format: crate::SourceFormat::Directory,
200 sha256: None,
201 };
202
203 if self.reader.unkown_enum_as_default {
204 result.unknown_to_default();
205 }
206 Ok(result)
207 }
208
209 #[cfg(not(target_arch = "wasm32"))]
212 pub fn read(self, gtfs: &str) -> Result<RawGtfs, Error> {
213 #[cfg(feature = "read-url")]
214 if gtfs.starts_with("http") {
215 return self.read_from_url(gtfs);
216 }
217 self.read_from_path(gtfs)
218 }
219
220 #[cfg(all(feature = "read-url", not(target_arch = "wasm32")))]
222 pub fn read_from_url<U: reqwest::IntoUrl>(self, url: U) -> Result<RawGtfs, Error> {
223 let mut res = reqwest::blocking::get(url)?;
224 let mut body = Vec::new();
225 res.read_to_end(&mut body)?;
226 let cursor = std::io::Cursor::new(body);
227 self.read_from_reader(cursor)
228 }
229
230 #[cfg(feature = "read-url")]
232 pub async fn read_from_url_async<U: reqwest::IntoUrl>(self, url: U) -> Result<RawGtfs, Error> {
233 let res = reqwest::get(url).await?.bytes().await?;
234 let reader = std::io::Cursor::new(res);
235 self.read_from_reader(reader)
236 }
237
238 pub fn read_from_path<P>(&self, path: P) -> Result<RawGtfs, Error>
240 where
241 P: AsRef<Path>,
242 {
243 let p = path.as_ref();
244 if p.is_file() {
245 let reader = File::open(p)?;
246 self.read_from_reader(reader)
247 } else if p.is_dir() {
248 self.read_from_directory(p)
249 } else {
250 Err(Error::NotFileNorDirectory(format!("{}", p.display())))
251 }
252 }
253
254 pub fn read_from_reader<T: std::io::Read + std::io::Seek>(
255 &self,
256 reader: T,
257 ) -> Result<RawGtfs, Error> {
258 let start_of_read_instant = Instant::now();
259 let hasher = Sha256::new();
260 let mut buf_reader = std::io::BufReader::new(reader);
261 let mut hash_io = digest_io::IoWrapper(hasher);
262 let _n = std::io::copy(&mut buf_reader, &mut hash_io)?;
263 let digest_io::IoWrapper(hasher) = hash_io;
264 let hash = hasher.finalize();
265 let mut archive = zip::ZipArchive::new(buf_reader)?;
266 let mut file_mapping = HashMap::new();
267 let mut files = Vec::new();
268
269 for i in 0..archive.len() {
270 let archive_file = archive.by_index(i)?;
271 files.push(archive_file.name().to_owned());
272
273 for gtfs_file in &[
274 "agency.txt",
275 "calendar.txt",
276 "calendar_dates.txt",
277 "routes.txt",
278 "stops.txt",
279 "stop_times.txt",
280 "trips.txt",
281 "fare_attributes.txt",
282 "fare_rules.txt",
283 "fare_products.txt",
284 "fare_media.txt",
285 "rider_categories.txt",
286 "frequencies.txt",
287 "transfers.txt",
288 "pathways.txt",
289 "feed_info.txt",
290 "shapes.txt",
291 "translations.txt",
292 "ticketing_deep_links.txt",
293 "ticketing_identifiers.txt",
294 ] {
295 let path = std::path::Path::new(archive_file.name());
296 if path.file_name() == Some(std::ffi::OsStr::new(gtfs_file)) {
297 file_mapping.insert(gtfs_file, i);
298 break;
299 }
300 }
301 }
302
303 let mut result = RawGtfs {
304 agencies: self.read_file(&file_mapping, &mut archive, "agency.txt"),
305 calendar: self.read_optional_file(&file_mapping, &mut archive, "calendar.txt"),
306 calendar_dates: self.read_optional_file(
307 &file_mapping,
308 &mut archive,
309 "calendar_dates.txt",
310 ),
311 routes: self.read_file(&file_mapping, &mut archive, "routes.txt"),
312 stops: self.read_file(&file_mapping, &mut archive, "stops.txt"),
313 stop_times: if self.reader.read_stop_times {
314 self.read_file(&file_mapping, &mut archive, "stop_times.txt")
315 } else {
316 Ok(Vec::new())
317 },
318 trips: self.read_file(&file_mapping, &mut archive, "trips.txt"),
319 fare_attributes: self.read_optional_file(
320 &file_mapping,
321 &mut archive,
322 "fare_attributes.txt",
323 ),
324 fare_rules: self.read_optional_file(&file_mapping, &mut archive, "fare_rules.txt"),
325 fare_products: self.read_optional_file(
326 &file_mapping,
327 &mut archive,
328 "fare_products.txt",
329 ),
330 fare_media: self.read_optional_file(&file_mapping, &mut archive, "fare_media.txt"),
331 rider_categories: self.read_optional_file(
332 &file_mapping,
333 &mut archive,
334 "rider_categories.txt",
335 ),
336 frequencies: self.read_optional_file(&file_mapping, &mut archive, "frequencies.txt"),
337 transfers: self.read_optional_file(&file_mapping, &mut archive, "transfers.txt"),
338 pathways: self.read_optional_file(&file_mapping, &mut archive, "pathways.txt"),
339 feed_info: self.read_optional_file(&file_mapping, &mut archive, "feed_info.txt"),
340 shapes: if self.reader.read_shapes {
341 self.read_optional_file(&file_mapping, &mut archive, "shapes.txt")
342 } else {
343 Some(Ok(Vec::new()))
344 },
345 translations: self.read_optional_file(&file_mapping, &mut archive, "translations.txt"),
346 ticketing_deep_links: self.read_optional_file(
347 &file_mapping,
348 &mut archive,
349 "ticketing_deep_links.txt",
350 ),
351 ticketing_identifiers: self.read_optional_file(
352 &file_mapping,
353 &mut archive,
354 "ticketing_identifiers.txt",
355 ),
356 read_duration: start_of_read_instant.elapsed(),
357 files,
358 source_format: crate::SourceFormat::Zip,
359 sha256: Some(base16ct::lower::encode_string(&hash)),
360 };
361
362 if self.reader.unkown_enum_as_default {
363 result.unknown_to_default();
364 }
365 Ok(result)
366 }
367
368 fn read_objs<T, O>(&self, mut reader: T, file_name: &str) -> Result<Vec<O>, Error>
369 where
370 for<'de> O: Deserialize<'de>,
371 T: std::io::Read,
372 {
373 let mut bom = [0; 3];
374 reader
375 .read_exact(&mut bom)
376 .map_err(|e| Error::NamedFileIO {
377 file_name: file_name.to_owned(),
378 source: Box::new(e),
379 })?;
380
381 let chained = if bom != [0xefu8, 0xbbu8, 0xbfu8] {
382 bom.chain(reader)
383 } else {
384 [].chain(reader)
385 };
386
387 let mut reader = csv::ReaderBuilder::new()
388 .flexible(true)
389 .trim(if self.reader.trim_fields {
390 csv::Trim::Fields
391 } else {
392 csv::Trim::None
393 })
394 .from_reader(chained);
395 let headers = reader
397 .headers()
398 .map_err(|e| Error::CSVError {
399 file_name: file_name.to_owned(),
400 source: e,
401 line_in_error: None,
402 })?
403 .clone()
404 .into_iter()
405 .map(|x| x.trim())
406 .collect::<csv::StringRecord>();
407
408 let mut rec = csv::StringRecord::new();
410 let mut objs = Vec::new();
411
412 while reader.read_record(&mut rec).map_err(|e| Error::CSVError {
414 file_name: file_name.to_owned(),
415 source: e,
416 line_in_error: None,
417 })? {
418 let obj = rec
419 .deserialize(Some(&headers))
420 .map_err(|e| Error::CSVError {
421 file_name: file_name.to_owned(),
422 source: e,
423 line_in_error: Some(crate::error::LineError {
424 headers: headers.into_iter().map(String::from).collect(),
425 values: rec.into_iter().map(String::from).collect(),
426 }),
427 })?;
428 objs.push(obj);
429 }
430 Ok(objs)
431 }
432
433 fn read_objs_from_path<O>(&self, path: std::path::PathBuf) -> Result<Vec<O>, Error>
434 where
435 for<'de> O: Deserialize<'de>,
436 {
437 let file_name = path
438 .file_name()
439 .and_then(|f| f.to_str())
440 .unwrap_or("invalid_file_name")
441 .to_string();
442 if path.exists() {
443 File::open(path)
444 .map_err(|e| Error::NamedFileIO {
445 file_name: file_name.to_owned(),
446 source: Box::new(e),
447 })
448 .and_then(|r| self.read_objs(r, &file_name))
449 } else {
450 Err(Error::MissingFile(file_name))
451 }
452 }
453
454 fn read_objs_from_optional_path<O>(
455 &self,
456 dir_path: &std::path::Path,
457 file_name: &str,
458 ) -> Option<Result<Vec<O>, Error>>
459 where
460 for<'de> O: Deserialize<'de>,
461 {
462 File::open(dir_path.join(file_name))
463 .ok()
464 .map(|r| self.read_objs(r, file_name))
465 }
466
467 fn read_file<O, T>(
468 &self,
469 file_mapping: &HashMap<&&str, usize>,
470 archive: &mut zip::ZipArchive<T>,
471 file_name: &str,
472 ) -> Result<Vec<O>, Error>
473 where
474 for<'de> O: Deserialize<'de>,
475 T: std::io::Read + std::io::Seek,
476 {
477 self.read_optional_file(file_mapping, archive, file_name)
478 .unwrap_or_else(|| Err(Error::MissingFile(file_name.to_owned())))
479 }
480
481 fn read_optional_file<O, T>(
482 &self,
483 file_mapping: &HashMap<&&str, usize>,
484 archive: &mut zip::ZipArchive<T>,
485 file_name: &str,
486 ) -> Option<Result<Vec<O>, Error>>
487 where
488 for<'de> O: Deserialize<'de>,
489 T: std::io::Read + std::io::Seek,
490 {
491 file_mapping.get(&file_name).map(|i| {
492 self.read_objs(
493 archive.by_index(*i).map_err(|e| Error::NamedFileIO {
494 file_name: file_name.to_owned(),
495 source: Box::new(e),
496 })?,
497 file_name,
498 )
499 })
500 }
501}