gtfs_structures/
gtfs.rs

1use crate::{objects::*, Error, RawGtfs};
2use chrono::prelude::NaiveDate;
3use std::collections::{HashMap, HashSet};
4use std::convert::TryFrom;
5use std::sync::Arc;
6use web_time::{Duration, Instant};
7
8/// Data structure with all the GTFS objects
9///
10/// This structure is easier to use than the [RawGtfs] structure as some relationships are parsed to be easier to use.
11///
12/// If you want to configure the behaviour (e.g. skipping : [StopTime] or [Shape]), see [crate::GtfsReader] for more personalisation
13///
14/// This is probably the entry point you want to use:
15/// ```
16/// let gtfs = gtfs_structures::Gtfs::new("fixtures/zips/gtfs.zip")?;
17/// assert_eq!(gtfs.stops.len(), 5);
18/// # Ok::<(), gtfs_structures::error::Error>(())
19/// ```
20///
21/// The [StopTime] are accessible from the [Trip]
22#[derive(Default)]
23pub struct Gtfs {
24    /// Time needed to read and parse the archive
25    pub read_duration: Duration,
26    /// All Calendar by `service_id`
27    pub calendar: HashMap<String, Calendar>,
28    /// All calendar dates grouped by service_id
29    pub calendar_dates: HashMap<String, Vec<CalendarDate>>,
30    /// All stop by `stop_id`. Stops are in an [Arc] because they are also referenced by each [StopTime]
31    pub stops: HashMap<String, Arc<Stop>>,
32    /// All routes by `route_id`
33    pub routes: HashMap<String, Route>,
34    /// All trips by `trip_id`
35    pub trips: HashMap<String, Trip>,
36    /// All agencies. They can not be read by `agency_id`, as it is not a required field
37    pub agencies: Vec<Agency>,
38    /// All shapes by shape_id
39    pub shapes: HashMap<String, Vec<Shape>>,
40    /// All fare attributes by `fare_id`
41    pub fare_attributes: HashMap<String, FareAttribute>,
42    /// All fare rules by `fare_id`
43    pub fare_rules: HashMap<String, Vec<FareRule>>,
44    /// All fare products by `fare_product_id`
45    pub fare_products: HashMap<String, Vec<FareProduct>>,
46    /// All fare media by `fare_media_id`
47    pub fare_media: HashMap<String, FareMedia>,
48    /// All rider categories by `rider_category_id`
49    pub rider_categories: HashMap<String, RiderCategory>,
50    /// All feed information. There is no identifier
51    pub feed_info: Vec<FeedInfo>,
52}
53
54impl TryFrom<RawGtfs> for Gtfs {
55    type Error = Error;
56    /// Tries to build a [Gtfs] from a [RawGtfs]
57    ///
58    /// It might fail if some mandatory files couldn’t be read or if there are references to other objects that are invalid.
59    fn try_from(raw: RawGtfs) -> Result<Gtfs, Error> {
60        let start = Instant::now();
61
62        let stops = to_stop_map(
63            raw.stops?,
64            raw.transfers.unwrap_or_else(|| Ok(Vec::new()))?,
65            raw.pathways.unwrap_or(Ok(Vec::new()))?,
66        )?;
67        let frequencies = raw.frequencies.unwrap_or_else(|| Ok(Vec::new()))?;
68        let trips = create_trips(raw.trips?, raw.stop_times?, frequencies, &stops)?;
69
70        let mut fare_rules = HashMap::<String, Vec<FareRule>>::new();
71        for f in raw.fare_rules.unwrap_or_else(|| Ok(Vec::new()))? {
72            fare_rules.entry(f.fare_id.clone()).or_default().push(f);
73        }
74
75        let mut fare_products = HashMap::<String, Vec<FareProduct>>::new();
76        for f in raw.fare_products.unwrap_or_else(|| Ok(Vec::new()))? {
77            fare_products.entry(f.id.clone()).or_default().push(f);
78        }
79
80        Ok(Gtfs {
81            stops,
82            routes: to_map(raw.routes?),
83            trips,
84            agencies: raw.agencies?,
85            shapes: to_shape_map(raw.shapes.unwrap_or_else(|| Ok(Vec::new()))?),
86            fare_attributes: to_map(raw.fare_attributes.unwrap_or_else(|| Ok(Vec::new()))?),
87            fare_rules,
88            fare_products,
89            fare_media: to_map(raw.fare_media.unwrap_or_else(|| Ok(Vec::new()))?),
90            rider_categories: to_map(raw.rider_categories.unwrap_or_else(|| Ok(Vec::new()))?),
91            feed_info: raw.feed_info.unwrap_or_else(|| Ok(Vec::new()))?,
92            calendar: to_map(raw.calendar.unwrap_or_else(|| Ok(Vec::new()))?),
93            calendar_dates: to_calendar_dates(
94                raw.calendar_dates.unwrap_or_else(|| Ok(Vec::new()))?,
95            ),
96            read_duration: raw.read_duration + start.elapsed(),
97        })
98    }
99}
100
101impl Gtfs {
102    /// Prints on stdout some basic statistics about the GTFS file (numbers of elements for each object). Mostly to be sure that everything was read
103    pub fn print_stats(&self) {
104        println!("GTFS data:");
105        println!("  Read in {:?}", self.read_duration);
106        println!("  Stops: {}", self.stops.len());
107        println!("  Routes: {}", self.routes.len());
108        println!("  Trips: {}", self.trips.len());
109        println!("  Agencies: {}", self.agencies.len());
110        println!("  Shapes: {}", self.shapes.len());
111        println!("  Fare attributes: {}", self.fare_attributes.len());
112        println!("  Feed info: {}", self.feed_info.len());
113    }
114
115    /// Reads from an url (if starts with `"http"`), or a local path (either a directory or zipped file)
116    ///
117    /// To read from an url, build with read-url feature
118    /// See also [Gtfs::from_url] and [Gtfs::from_path] if you don’t want the library to guess
119    #[cfg(not(target_arch = "wasm32"))]
120    pub fn new(gtfs: &str) -> Result<Gtfs, Error> {
121        RawGtfs::new(gtfs).and_then(Gtfs::try_from)
122    }
123
124    /// Reads the GTFS from a local zip archive or local directory
125    pub fn from_path<P>(path: P) -> Result<Gtfs, Error>
126    where
127        P: AsRef<std::path::Path>,
128    {
129        RawGtfs::from_path(path).and_then(Gtfs::try_from)
130    }
131
132    /// Reads the GTFS from a remote url
133    ///
134    /// The library must be built with the read-url feature. Not available on WASM targets.
135    #[cfg(all(feature = "read-url", not(target_arch = "wasm32")))]
136    pub fn from_url<U: reqwest::IntoUrl>(url: U) -> Result<Gtfs, Error> {
137        RawGtfs::from_url(url).and_then(Gtfs::try_from)
138    }
139
140    /// Asynchronously reads the GTFS from a remote url
141    ///
142    /// The library must be built with the read-url feature
143    #[cfg(feature = "read-url")]
144    pub async fn from_url_async<U: reqwest::IntoUrl>(url: U) -> Result<Gtfs, Error> {
145        RawGtfs::from_url_async(url).await.and_then(Gtfs::try_from)
146    }
147
148    /// Reads for any object implementing [std::io::Read] and [std::io::Seek]
149    ///
150    /// Mostly an internal function that abstracts reading from an url or local file
151    pub fn from_reader<T: std::io::Read + std::io::Seek>(reader: T) -> Result<Gtfs, Error> {
152        RawGtfs::from_reader(reader).and_then(Gtfs::try_from)
153    }
154
155    /// For a given a `service_id` and a starting date returns all the following day offset the vehicle runs
156    ///
157    /// For instance if the `start_date` is 2021-12-20, `[0, 4]` means that the vehicle will run the 20th and 24th
158    ///
159    /// It will consider use both [Calendar] and [CalendarDate] (both added and removed)
160    pub fn trip_days(&self, service_id: &str, start_date: NaiveDate) -> Vec<u16> {
161        let mut result = Vec::new();
162
163        // Handle services given by specific days and exceptions
164        let mut removed_days = HashSet::new();
165        for extra_day in self
166            .calendar_dates
167            .get(service_id)
168            .iter()
169            .flat_map(|e| e.iter())
170        {
171            let offset = extra_day.date.signed_duration_since(start_date).num_days();
172            if offset >= 0 {
173                if extra_day.exception_type == Exception::Added {
174                    result.push(offset as u16);
175                } else if extra_day.exception_type == Exception::Deleted {
176                    removed_days.insert(offset);
177                }
178            }
179        }
180
181        if let Some(calendar) = self.calendar.get(service_id) {
182            let total_days = calendar
183                .end_date
184                .signed_duration_since(start_date)
185                .num_days();
186            for days_offset in 0..=total_days {
187                if let Some(days_offset_timedelta) = chrono::TimeDelta::try_days(days_offset) {
188                    let current_date = start_date + days_offset_timedelta;
189
190                    if calendar.start_date <= current_date
191                        && calendar.end_date >= current_date
192                        && calendar.valid_weekday(current_date)
193                        && !removed_days.contains(&days_offset)
194                    {
195                        result.push(days_offset as u16);
196                    }
197                }
198            }
199        }
200
201        result
202    }
203
204    /// Gets a [Stop] by its `stop_id`
205    pub fn get_stop<'a>(&'a self, id: &str) -> Result<&'a Stop, Error> {
206        match self.stops.get(id) {
207            Some(stop) => Ok(stop),
208            None => Err(Error::ReferenceError(id.to_owned())),
209        }
210    }
211
212    /// Gets a [Trip] by its `trip_id`
213    pub fn get_trip<'a>(&'a self, id: &str) -> Result<&'a Trip, Error> {
214        self.trips
215            .get(id)
216            .ok_or_else(|| Error::ReferenceError(id.to_owned()))
217    }
218
219    /// Gets a [Route] by its `route_id`
220    pub fn get_route<'a>(&'a self, id: &str) -> Result<&'a Route, Error> {
221        self.routes
222            .get(id)
223            .ok_or_else(|| Error::ReferenceError(id.to_owned()))
224    }
225
226    /// Gets a [Calendar] by its `service_id`
227    pub fn get_calendar<'a>(&'a self, id: &str) -> Result<&'a Calendar, Error> {
228        self.calendar
229            .get(id)
230            .ok_or_else(|| Error::ReferenceError(id.to_owned()))
231    }
232
233    /// Gets all [CalendarDate] of a `service_id`
234    pub fn get_calendar_date<'a>(&'a self, id: &str) -> Result<&'a Vec<CalendarDate>, Error> {
235        self.calendar_dates
236            .get(id)
237            .ok_or_else(|| Error::ReferenceError(id.to_owned()))
238    }
239
240    /// Gets all [Shape] points of a `shape_id`
241    pub fn get_shape<'a>(&'a self, id: &str) -> Result<&'a Vec<Shape>, Error> {
242        self.shapes
243            .get(id)
244            .ok_or_else(|| Error::ReferenceError(id.to_owned()))
245    }
246
247    /// Gets a [FareAttribute] by its `fare_id`
248    pub fn get_fare_attributes<'a>(&'a self, id: &str) -> Result<&'a FareAttribute, Error> {
249        self.fare_attributes
250            .get(id)
251            .ok_or_else(|| Error::ReferenceError(id.to_owned()))
252    }
253}
254
255fn to_map<O: Id>(elements: impl IntoIterator<Item = O>) -> HashMap<String, O> {
256    elements
257        .into_iter()
258        .map(|e| (e.id().to_owned(), e))
259        .collect()
260}
261
262fn to_stop_map(
263    stops: Vec<Stop>,
264    raw_transfers: Vec<RawTransfer>,
265    raw_pathways: Vec<RawPathway>,
266) -> Result<HashMap<String, Arc<Stop>>, Error> {
267    let mut stop_map: HashMap<String, Stop> =
268        stops.into_iter().map(|s| (s.id.clone(), s)).collect();
269
270    for transfer in raw_transfers {
271        stop_map.get(&transfer.to_stop_id).ok_or_else(|| {
272            let stop_id = &transfer.to_stop_id;
273            Error::ReferenceError(format!("'{stop_id}' in transfers.txt"))
274        })?;
275        stop_map
276            .entry(transfer.from_stop_id.clone())
277            .and_modify(|stop| stop.transfers.push(StopTransfer::from(transfer)));
278    }
279
280    for pathway in raw_pathways {
281        stop_map.get(&pathway.to_stop_id).ok_or_else(|| {
282            let stop_id = &pathway.to_stop_id;
283            Error::ReferenceError(format!("'{stop_id}' in pathways.txt"))
284        })?;
285        stop_map
286            .entry(pathway.from_stop_id.clone())
287            .and_modify(|stop| stop.pathways.push(Pathway::from(pathway)));
288    }
289
290    let res = stop_map
291        .into_iter()
292        .map(|(i, s)| (i, Arc::new(s)))
293        .collect();
294    Ok(res)
295}
296
297fn to_shape_map(shapes: Vec<Shape>) -> HashMap<String, Vec<Shape>> {
298    let mut res = HashMap::default();
299    for s in shapes {
300        let shape = res.entry(s.id.to_owned()).or_insert_with(Vec::new);
301        shape.push(s);
302    }
303    // we sort the shape by it's pt_sequence
304    for shapes in res.values_mut() {
305        shapes.sort_by_key(|s| s.sequence);
306    }
307
308    res
309}
310
311fn to_calendar_dates(cd: Vec<CalendarDate>) -> HashMap<String, Vec<CalendarDate>> {
312    let mut res = HashMap::default();
313    for c in cd {
314        let cal = res.entry(c.service_id.to_owned()).or_insert_with(Vec::new);
315        cal.push(c);
316    }
317    res
318}
319
320// Number of stoptimes to `pop` from the list before using shrink_to_fit to reduce the memory footprint
321// Hardcoded to what seems a sensible value, but if needed we could make this a parameter, feel free to open an issue if this could help
322const NB_STOP_TIMES_BEFORE_SHRINK: usize = 1_000_000;
323
324fn create_trips(
325    raw_trips: Vec<RawTrip>,
326    mut raw_stop_times: Vec<RawStopTime>,
327    raw_frequencies: Vec<RawFrequency>,
328    stops: &HashMap<String, Arc<Stop>>,
329) -> Result<HashMap<String, Trip>, Error> {
330    let mut trips = to_map(raw_trips.into_iter().map(|rt| Trip {
331        id: rt.id,
332        service_id: rt.service_id,
333        route_id: rt.route_id,
334        stop_times: vec![],
335        shape_id: rt.shape_id,
336        trip_headsign: rt.trip_headsign,
337        trip_short_name: rt.trip_short_name,
338        direction_id: rt.direction_id,
339        block_id: rt.block_id,
340        wheelchair_accessible: rt.wheelchair_accessible,
341        bikes_allowed: rt.bikes_allowed,
342        frequencies: vec![],
343    }));
344
345    let mut st_idx = 0;
346    while let Some(s) = raw_stop_times.pop() {
347        st_idx += 1;
348        let trip = &mut trips
349            .get_mut(&s.trip_id)
350            .ok_or_else(|| Error::ReferenceError(s.trip_id.to_string()))?;
351        let stop = stops
352            .get(&s.stop_id)
353            .ok_or_else(|| Error::ReferenceError(s.stop_id.to_string()))?;
354        trip.stop_times.push(StopTime::from(s, Arc::clone(stop)));
355        if st_idx % NB_STOP_TIMES_BEFORE_SHRINK == 0 {
356            raw_stop_times.shrink_to_fit();
357        }
358    }
359
360    for trip in &mut trips.values_mut() {
361        trip.stop_times
362            .sort_by(|a, b| a.stop_sequence.cmp(&b.stop_sequence));
363    }
364
365    for f in raw_frequencies {
366        let trip = &mut trips
367            .get_mut(&f.trip_id)
368            .ok_or_else(|| Error::ReferenceError(f.trip_id.to_string()))?;
369        trip.frequencies.push(Frequency::from(&f));
370    }
371
372    Ok(trips)
373}