1use serde::Deserialize;
2use sha2::{Digest, Sha256};
3
4use crate::{Error, Gtfs, RawGtfs};
5use std::collections::HashMap;
6use std::convert::TryFrom;
7use std::fs::File;
8use std::io::Read;
9use std::path::Path;
10use web_time::Instant;
11
12pub struct GtfsReader {
35 pub read_stop_times: bool,
37 pub read_shapes: bool,
39 pub unkown_enum_as_default: bool,
41 pub trim_fields: bool,
46}
47
48impl Default for GtfsReader {
49 fn default() -> Self {
50 GtfsReader {
51 read_stop_times: true,
52 read_shapes: true,
53 unkown_enum_as_default: false,
54 trim_fields: true,
55 }
56 }
57}
58
59impl GtfsReader {
60 pub fn read_stop_times(mut self, read_stop_times: bool) -> Self {
65 self.read_stop_times = read_stop_times;
66 self
67 }
68
69 pub fn read_shapes(mut self, read_shapes: bool) -> Self {
72 self.read_shapes = read_shapes;
73 self
74 }
75
76 pub fn unkown_enum_as_default(mut self, unkown_enum_as_default: bool) -> Self {
83 self.unkown_enum_as_default = unkown_enum_as_default;
84 self
85 }
86
87 pub fn trim_fields(mut self, trim_fields: bool) -> Self {
92 self.trim_fields = trim_fields;
93 self
94 }
95
96 #[cfg(not(target_arch = "wasm32"))]
101 pub fn read(self, gtfs: &str) -> Result<Gtfs, Error> {
102 self.raw().read(gtfs).and_then(Gtfs::try_from)
103 }
104
105 pub fn read_from_path<P>(self, path: P) -> Result<Gtfs, Error>
107 where
108 P: AsRef<Path>,
109 {
110 self.raw().read_from_path(path).and_then(Gtfs::try_from)
111 }
112
113 #[cfg(all(feature = "read-url", not(target_arch = "wasm32")))]
117 pub fn read_from_url<U: reqwest::IntoUrl>(self, url: U) -> Result<Gtfs, Error> {
118 self.raw().read_from_url(url).and_then(Gtfs::try_from)
119 }
120
121 #[cfg(feature = "read-url")]
125 pub async fn read_from_url_async<U: reqwest::IntoUrl>(self, url: U) -> Result<Gtfs, Error> {
126 self.raw()
127 .read_from_url_async(url)
128 .await
129 .and_then(Gtfs::try_from)
130 }
131
132 pub fn raw(self) -> RawGtfsReader {
144 RawGtfsReader { reader: self }
145 }
146}
147
148pub struct RawGtfsReader {
152 reader: GtfsReader,
153}
154
155impl RawGtfsReader {
156 fn read_from_directory(&self, p: &std::path::Path) -> Result<RawGtfs, Error> {
157 let start_of_read_instant = Instant::now();
158 let files = std::fs::read_dir(p)?
161 .filter_map(|d| {
162 d.ok().and_then(|e| {
163 e.path()
164 .strip_prefix(p)
165 .ok()
166 .and_then(|f| f.to_str().map(|s| s.to_owned()))
167 })
168 })
169 .collect();
170
171 let mut result = RawGtfs {
172 trips: self.read_objs_from_path(p.join("trips.txt")),
173 calendar: self.read_objs_from_optional_path(p, "calendar.txt"),
174 calendar_dates: self.read_objs_from_optional_path(p, "calendar_dates.txt"),
175 stops: self.read_objs_from_path(p.join("stops.txt")),
176 routes: self.read_objs_from_path(p.join("routes.txt")),
177 stop_times: if self.reader.read_stop_times {
178 self.read_objs_from_path(p.join("stop_times.txt"))
179 } else {
180 Ok(Vec::new())
181 },
182 agencies: self.read_objs_from_path(p.join("agency.txt")),
183 shapes: self.read_objs_from_optional_path(p, "shapes.txt"),
184 fare_attributes: self.read_objs_from_optional_path(p, "fare_attributes.txt"),
185 fare_rules: self.read_objs_from_optional_path(p, "fare_rules.txt"),
186 fare_products: self.read_objs_from_optional_path(p, "fare_products.txt"),
187 fare_media: self.read_objs_from_optional_path(p, "fare_media.txt"),
188 rider_categories: self.read_objs_from_optional_path(p, "rider_categories.txt"),
189 frequencies: self.read_objs_from_optional_path(p, "frequencies.txt"),
190 transfers: self.read_objs_from_optional_path(p, "transfers.txt"),
191 pathways: self.read_objs_from_optional_path(p, "pathways.txt"),
192 feed_info: self.read_objs_from_optional_path(p, "feed_info.txt"),
193 read_duration: start_of_read_instant.elapsed(),
194 translations: self.read_objs_from_optional_path(p, "translations.txt"),
195 ticketing_deep_links: self.read_objs_from_optional_path(p, "ticketing_deep_links.txt"),
196 ticketing_identifiers: self
197 .read_objs_from_optional_path(p, "ticketing_identifiers.txt"),
198 files,
199 source_format: crate::SourceFormat::Directory,
200 sha256: None,
201 };
202
203 if self.reader.unkown_enum_as_default {
204 result.unknown_to_default();
205 }
206 Ok(result)
207 }
208
209 #[cfg(not(target_arch = "wasm32"))]
212 pub fn read(self, gtfs: &str) -> Result<RawGtfs, Error> {
213 #[cfg(feature = "read-url")]
214 if gtfs.starts_with("http") {
215 return self.read_from_url(gtfs);
216 }
217 self.read_from_path(gtfs)
218 }
219
220 #[cfg(all(feature = "read-url", not(target_arch = "wasm32")))]
222 pub fn read_from_url<U: reqwest::IntoUrl>(self, url: U) -> Result<RawGtfs, Error> {
223 let mut res = reqwest::blocking::get(url)?;
224 let mut body = Vec::new();
225 res.read_to_end(&mut body)?;
226 let cursor = std::io::Cursor::new(body);
227 self.read_from_reader(cursor)
228 }
229
230 #[cfg(feature = "read-url")]
232 pub async fn read_from_url_async<U: reqwest::IntoUrl>(self, url: U) -> Result<RawGtfs, Error> {
233 let res = reqwest::get(url).await?.bytes().await?;
234 let reader = std::io::Cursor::new(res);
235 self.read_from_reader(reader)
236 }
237
238 pub fn read_from_path<P>(&self, path: P) -> Result<RawGtfs, Error>
240 where
241 P: AsRef<Path>,
242 {
243 let p = path.as_ref();
244 if p.is_file() {
245 let reader = File::open(p)?;
246 self.read_from_reader(reader)
247 } else if p.is_dir() {
248 self.read_from_directory(p)
249 } else {
250 Err(Error::NotFileNorDirectory(format!("{}", p.display())))
251 }
252 }
253
254 pub fn read_from_reader<T: std::io::Read + std::io::Seek>(
255 &self,
256 reader: T,
257 ) -> Result<RawGtfs, Error> {
258 let start_of_read_instant = Instant::now();
259 let hasher = Sha256::new();
260 let mut buf_reader = std::io::BufReader::new(reader);
261 let mut hash_io = digest_io::IoWrapper(hasher);
262 let _n = std::io::copy(&mut buf_reader, &mut hash_io)?;
263 let digest_io::IoWrapper(hasher) = hash_io;
264 let hash = hasher.finalize();
265 let mut archive = zip::ZipArchive::new(buf_reader)?;
266 let mut file_mapping = HashMap::new();
267 let mut files = Vec::new();
268
269 for i in 0..archive.len() {
270 let archive_file = archive.by_index(i)?;
271 files.push(archive_file.name().to_owned());
272
273 for gtfs_file in &[
274 "agency.txt",
275 "calendar.txt",
276 "calendar_dates.txt",
277 "routes.txt",
278 "stops.txt",
279 "stop_times.txt",
280 "trips.txt",
281 "fare_attributes.txt",
282 "fare_rules.txt",
283 "fare_products.txt",
284 "fare_media.txt",
285 "rider_categories.txt",
286 "frequencies.txt",
287 "transfers.txt",
288 "pathways.txt",
289 "feed_info.txt",
290 "shapes.txt",
291 ] {
292 let path = std::path::Path::new(archive_file.name());
293 if path.file_name() == Some(std::ffi::OsStr::new(gtfs_file)) {
294 file_mapping.insert(gtfs_file, i);
295 break;
296 }
297 }
298 }
299
300 let mut result = RawGtfs {
301 agencies: self.read_file(&file_mapping, &mut archive, "agency.txt"),
302 calendar: self.read_optional_file(&file_mapping, &mut archive, "calendar.txt"),
303 calendar_dates: self.read_optional_file(
304 &file_mapping,
305 &mut archive,
306 "calendar_dates.txt",
307 ),
308 routes: self.read_file(&file_mapping, &mut archive, "routes.txt"),
309 stops: self.read_file(&file_mapping, &mut archive, "stops.txt"),
310 stop_times: if self.reader.read_stop_times {
311 self.read_file(&file_mapping, &mut archive, "stop_times.txt")
312 } else {
313 Ok(Vec::new())
314 },
315 trips: self.read_file(&file_mapping, &mut archive, "trips.txt"),
316 fare_attributes: self.read_optional_file(
317 &file_mapping,
318 &mut archive,
319 "fare_attributes.txt",
320 ),
321 fare_rules: self.read_optional_file(&file_mapping, &mut archive, "fare_rules.txt"),
322 fare_products: self.read_optional_file(
323 &file_mapping,
324 &mut archive,
325 "fare_products.txt",
326 ),
327 fare_media: self.read_optional_file(&file_mapping, &mut archive, "fare_media.txt"),
328 rider_categories: self.read_optional_file(
329 &file_mapping,
330 &mut archive,
331 "rider_categories.txt",
332 ),
333 frequencies: self.read_optional_file(&file_mapping, &mut archive, "frequencies.txt"),
334 transfers: self.read_optional_file(&file_mapping, &mut archive, "transfers.txt"),
335 pathways: self.read_optional_file(&file_mapping, &mut archive, "pathways.txt"),
336 feed_info: self.read_optional_file(&file_mapping, &mut archive, "feed_info.txt"),
337 shapes: if self.reader.read_shapes {
338 self.read_optional_file(&file_mapping, &mut archive, "shapes.txt")
339 } else {
340 Some(Ok(Vec::new()))
341 },
342 translations: self.read_optional_file(&file_mapping, &mut archive, "translations.txt"),
343 ticketing_deep_links: self.read_optional_file(
344 &file_mapping,
345 &mut archive,
346 "ticketing_deep_links.txt",
347 ),
348 ticketing_identifiers: self.read_optional_file(
349 &file_mapping,
350 &mut archive,
351 "ticketing_identifiers.txt",
352 ),
353 read_duration: start_of_read_instant.elapsed(),
354 files,
355 source_format: crate::SourceFormat::Zip,
356 sha256: Some(base16ct::lower::encode_string(&hash)),
357 };
358
359 if self.reader.unkown_enum_as_default {
360 result.unknown_to_default();
361 }
362 Ok(result)
363 }
364
365 fn read_objs<T, O>(&self, mut reader: T, file_name: &str) -> Result<Vec<O>, Error>
366 where
367 for<'de> O: Deserialize<'de>,
368 T: std::io::Read,
369 {
370 let mut bom = [0; 3];
371 reader
372 .read_exact(&mut bom)
373 .map_err(|e| Error::NamedFileIO {
374 file_name: file_name.to_owned(),
375 source: Box::new(e),
376 })?;
377
378 let chained = if bom != [0xefu8, 0xbbu8, 0xbfu8] {
379 bom.chain(reader)
380 } else {
381 [].chain(reader)
382 };
383
384 let mut reader = csv::ReaderBuilder::new()
385 .flexible(true)
386 .trim(if self.reader.trim_fields {
387 csv::Trim::Fields
388 } else {
389 csv::Trim::None
390 })
391 .from_reader(chained);
392 let headers = reader
394 .headers()
395 .map_err(|e| Error::CSVError {
396 file_name: file_name.to_owned(),
397 source: e,
398 line_in_error: None,
399 })?
400 .clone()
401 .into_iter()
402 .map(|x| x.trim())
403 .collect::<csv::StringRecord>();
404
405 let mut rec = csv::StringRecord::new();
407 let mut objs = Vec::new();
408
409 while reader.read_record(&mut rec).map_err(|e| Error::CSVError {
411 file_name: file_name.to_owned(),
412 source: e,
413 line_in_error: None,
414 })? {
415 let obj = rec
416 .deserialize(Some(&headers))
417 .map_err(|e| Error::CSVError {
418 file_name: file_name.to_owned(),
419 source: e,
420 line_in_error: Some(crate::error::LineError {
421 headers: headers.into_iter().map(String::from).collect(),
422 values: rec.into_iter().map(String::from).collect(),
423 }),
424 })?;
425 objs.push(obj);
426 }
427 Ok(objs)
428 }
429
430 fn read_objs_from_path<O>(&self, path: std::path::PathBuf) -> Result<Vec<O>, Error>
431 where
432 for<'de> O: Deserialize<'de>,
433 {
434 let file_name = path
435 .file_name()
436 .and_then(|f| f.to_str())
437 .unwrap_or("invalid_file_name")
438 .to_string();
439 if path.exists() {
440 File::open(path)
441 .map_err(|e| Error::NamedFileIO {
442 file_name: file_name.to_owned(),
443 source: Box::new(e),
444 })
445 .and_then(|r| self.read_objs(r, &file_name))
446 } else {
447 Err(Error::MissingFile(file_name))
448 }
449 }
450
451 fn read_objs_from_optional_path<O>(
452 &self,
453 dir_path: &std::path::Path,
454 file_name: &str,
455 ) -> Option<Result<Vec<O>, Error>>
456 where
457 for<'de> O: Deserialize<'de>,
458 {
459 File::open(dir_path.join(file_name))
460 .ok()
461 .map(|r| self.read_objs(r, file_name))
462 }
463
464 fn read_file<O, T>(
465 &self,
466 file_mapping: &HashMap<&&str, usize>,
467 archive: &mut zip::ZipArchive<T>,
468 file_name: &str,
469 ) -> Result<Vec<O>, Error>
470 where
471 for<'de> O: Deserialize<'de>,
472 T: std::io::Read + std::io::Seek,
473 {
474 self.read_optional_file(file_mapping, archive, file_name)
475 .unwrap_or_else(|| Err(Error::MissingFile(file_name.to_owned())))
476 }
477
478 fn read_optional_file<O, T>(
479 &self,
480 file_mapping: &HashMap<&&str, usize>,
481 archive: &mut zip::ZipArchive<T>,
482 file_name: &str,
483 ) -> Option<Result<Vec<O>, Error>>
484 where
485 for<'de> O: Deserialize<'de>,
486 T: std::io::Read + std::io::Seek,
487 {
488 file_mapping.get(&file_name).map(|i| {
489 self.read_objs(
490 archive.by_index(*i).map_err(|e| Error::NamedFileIO {
491 file_name: file_name.to_owned(),
492 source: Box::new(e),
493 })?,
494 file_name,
495 )
496 })
497 }
498}