1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
/// Copyright: Kyler Chin <kyler@catenarymaps.org>
/// Catenary Transit Initiatives
/// Removal of the attribution is not allowed, as covered under the AGPL license

use dmfr::*;
use serde_json::Error as SerdeError;
use std::collections::{HashMap, HashSet};
use std::error::Error;
use std::fs;
use std::sync::Arc;

#[derive(Debug, Clone)]
pub struct OperatorPairInfo {
    pub operator_id: String,
    pub gtfs_agency_id: Option<String>,
}

#[derive(Debug, Clone)]
pub struct FeedPairInfo {
    pub feed_onestop_id: String,
    pub gtfs_agency_id: Option<String>,
}

pub type FeedId = String;
pub type OperatorId = String;

#[derive(Debug)]
pub struct ReturnDmfrAnalysis {
    pub feed_hashmap: HashMap<FeedId, dmfr::Feed>,
    pub operator_hashmap: HashMap<OperatorId, dmfr::Operator>,
    pub operator_to_feed_hashmap: HashMap<OperatorId, Vec<FeedPairInfo>>,
    pub feed_to_operator_pairs_hashmap: HashMap<FeedId, Vec<OperatorPairInfo>>,
    pub list_of_bad_files: Option<Vec<String>>
}

pub fn process_feed(
    feed: &dmfr::Feed,
    feed_hashmap: &mut HashMap<FeedId, dmfr::Feed>,
    operator_hashmap: &mut HashMap<OperatorId, dmfr::Operator>,
    operator_to_feed_hashmap: &mut HashMap<OperatorId, Vec<FeedPairInfo>>,
    feed_to_operator_pairs_hashmap: &mut HashMap<FeedId, Vec<OperatorPairInfo>>,
) -> () {
    feed_hashmap.entry(feed.id.clone()).or_insert(feed.clone());

    for operator in feed.operators.iter() {
        process_operator(
            &operator,
            feed_hashmap,
            operator_hashmap,
            operator_to_feed_hashmap,
            feed_to_operator_pairs_hashmap,
            Some(&feed.id),
        );

        operator_to_feed_hashmap
            .entry(operator.onestop_id.clone())
            .and_modify(|associated_feeds| {
                let set_of_existing_ids: HashSet<String> = HashSet::from_iter(
                    associated_feeds
                        .iter()
                        .map(|feed_item| feed_item.feed_onestop_id.clone()),
                );

                if !set_of_existing_ids.contains(&feed.id) {
                    associated_feeds.push(FeedPairInfo {
                        feed_onestop_id: feed.id.clone(),
                        gtfs_agency_id: None,
                    });
                }
            })
            .or_insert(vec![FeedPairInfo {
                feed_onestop_id: feed.id.clone(),
                gtfs_agency_id: None,
            }]);

        feed_to_operator_pairs_hashmap
            .entry(feed.id.clone())
            .and_modify(|operator_pairs| {
                let set_of_existing_operator_ids: HashSet<String> = HashSet::from_iter(
                    operator_pairs
                        .iter()
                        .map(|operator_pair| operator_pair.operator_id.clone()),
                );

                if !set_of_existing_operator_ids.contains(&operator.onestop_id.clone()) {
                    operator_pairs.push(OperatorPairInfo {
                        operator_id: operator.onestop_id.clone(),
                        gtfs_agency_id: None,
                    });
                }
            })
            .or_insert(vec![OperatorPairInfo {
                operator_id: operator.onestop_id.clone(),
                gtfs_agency_id: None,
            }]);
    }
}

pub fn process_operator(
    operator: &dmfr::Operator,
    feed_hashmap: &mut HashMap<FeedId, dmfr::Feed>,
    operator_hashmap: &mut HashMap<OperatorId, dmfr::Operator>,
    operator_to_feed_hashmap: &mut HashMap<OperatorId, Vec<FeedPairInfo>>,
    feed_to_operator_pairs_hashmap: &mut HashMap<FeedId, Vec<OperatorPairInfo>>,
    parent_feed_id: Option<&str>,
) -> () {
    operator_hashmap
        .entry(operator.onestop_id.clone())
        .or_insert(operator.clone());

    for associated_feed in operator.associated_feeds.iter() {
        let mut associated_feed_insertion: FeedPairInfo =
            match associated_feed.feed_onestop_id.as_ref() {
                Some(feed_onestop_id) => FeedPairInfo {
                    feed_onestop_id: feed_onestop_id.clone(),
                    gtfs_agency_id: associated_feed.feed_onestop_id.clone(),
                },
                None => FeedPairInfo {
                    feed_onestop_id: String::from(*parent_feed_id.as_ref().unwrap()),
                    gtfs_agency_id: associated_feed.feed_onestop_id.clone(),
                },
            };

        //if associated_feed_insertion.feed_onestop_id == Some(String::from("f-ucla~bruinbus~rt")) {
        //    println!("Bruin realtime feed found! {:?}", associated_feed_insertion);
        //}

        operator_to_feed_hashmap
            .entry(operator.onestop_id.clone())
            .and_modify(|associated_feeds| {
                let set_of_existing_ids: HashSet<String> = HashSet::from_iter(
                    associated_feeds
                        .iter()
                        .map(|feed_item| feed_item.feed_onestop_id.clone()),
                );

                if !set_of_existing_ids.contains(&associated_feed_insertion.feed_onestop_id) {
                    associated_feeds.push(associated_feed_insertion.clone())
                }
            })
            .or_insert(vec![associated_feed_insertion.clone()]);

        feed_to_operator_pairs_hashmap
            .entry(associated_feed_insertion.feed_onestop_id.clone())
            .and_modify(|operator_pairs| {
                let set_of_existing_operator_ids: HashSet<String> = HashSet::from_iter(
                    operator_pairs
                        .iter()
                        .map(|operator_pair| operator_pair.operator_id.clone()),
                );

                if !set_of_existing_operator_ids.contains(&operator.onestop_id.clone()) {
                    operator_pairs.push(OperatorPairInfo {
                        operator_id: operator.onestop_id.clone(),
                        gtfs_agency_id: associated_feed_insertion.gtfs_agency_id.clone(),
                    });
                }
            })
            .or_insert(vec![OperatorPairInfo {
                operator_id: operator.onestop_id.clone(),
                gtfs_agency_id: associated_feed_insertion.gtfs_agency_id.clone(),
            }]);
    }
}

pub fn read_folders(path: &str) -> Result<ReturnDmfrAnalysis, Box<dyn Error + Send + Sync>> {
    let feed_entries = fs::read_dir(format!("{}/feeds/", path))?;

    let mut feed_hashmap: HashMap<FeedId, dmfr::Feed> = HashMap::new();
    let mut operator_hashmap: HashMap<OperatorId, dmfr::Operator> = HashMap::new();
    let mut operator_to_feed_hashmap: HashMap<OperatorId, Vec<FeedPairInfo>> = HashMap::new();
    let mut feed_to_operator_pairs_hashmap: HashMap<FeedId, Vec<OperatorPairInfo>> = HashMap::new();

    let mut list_of_bad_files:Vec<String> = vec![];

    for entry in feed_entries {
        if let Ok(entry) = entry {
            if let Some(file_name) = entry.file_name().to_str() {
                //println!("{}", file_name);
                let contents = fs::read_to_string(format!("{}/feeds/{}", path, file_name));
                if contents.is_err() {
                    eprintln!(
                        "Error Reading Feed File {}: {}",
                        file_name,
                        contents.unwrap_err()
                    );
                    continue;
                }
                let dmfrinfo: Result<dmfr::DistributedMobilityFeedRegistry, SerdeError> =
                    serde_json::from_str(&contents.unwrap());
                match dmfrinfo {
                    Ok(dmfrinfo) => {
                        for feed in dmfrinfo.feeds.into_iter() {
                            process_feed(
                                &feed,
                                &mut feed_hashmap,
                                &mut operator_hashmap,
                                &mut operator_to_feed_hashmap,
                                &mut feed_to_operator_pairs_hashmap,
                            );
                        }

                        for operator in dmfrinfo.operators.into_iter() {
                            process_operator(
                                &operator,
                                &mut feed_hashmap,
                                &mut operator_hashmap,
                                &mut operator_to_feed_hashmap,
                                &mut feed_to_operator_pairs_hashmap,
                                None,
                            );
                        }
                    }
                    Err(_) => {
                        list_of_bad_files.push(file_name.to_string());
                    }
                }
            }
        }
    }

    let operator_entries =
        fs::read_dir(format!("{}/operators/", path)).expect("Transitland atlas missing");

    for operator_file in operator_entries {
        if let Ok(operator_file) = operator_file {
            if let Some(file_name) = operator_file.file_name().to_str() {
                let contents = fs::read_to_string(format!("{}/operators/{}", path, file_name));
                if contents.is_err() {
                    eprintln!(
                        "Error Reading Operator File {}: {}",
                        file_name,
                        contents.unwrap_err()
                    );
                    continue;
                }

                let operator: Result<dmfr::Operator, SerdeError> =
                    serde_json::from_str(&contents.unwrap());

                if let Ok(operator) = operator {
                    process_operator(
                        &operator,
                        &mut feed_hashmap,
                        &mut operator_hashmap,
                        &mut operator_to_feed_hashmap,
                        &mut feed_to_operator_pairs_hashmap,
                        None,
                    );
                }
            }
        }
    }

    let operator_entries = fs::read_dir(format!("{}/operators/switzerland/", path));

    if let Some(operator_entries) = operator_entries.ok() {
        for operator_file in operator_entries {
            if let Ok(operator_file) = operator_file {
                if let Some(file_name) = operator_file.file_name().to_str() {
                    let contents =
                        fs::read_to_string(format!("{}/operators/switzerland/{}", path, file_name));
                    if contents.is_err() {
                        eprintln!(
                            "Error Reading Swiss Operator File {}: {}",
                            file_name,
                            contents.unwrap_err()
                        );
                        continue;
                    }

                    let operator: Result<dmfr::Operator, SerdeError> =
                        serde_json::from_str(&contents.unwrap());

                    if let Ok(operator) = operator {
                        process_operator(
                            &operator,
                            &mut feed_hashmap,
                            &mut operator_hashmap,
                            &mut operator_to_feed_hashmap,
                            &mut feed_to_operator_pairs_hashmap,
                            None,
                        );
                    }
                }
            }
        }
    }

    //cross check feed_to_operator_hashmap into feed_to_operator_pairs_hashmap

    Ok(ReturnDmfrAnalysis {
        feed_hashmap,
        operator_hashmap,
        operator_to_feed_hashmap,
        feed_to_operator_pairs_hashmap,
        list_of_bad_files: Some(list_of_bad_files)
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test() {
        println!("MAIN TEST");
        let dmfr_result = read_folders("transitland-atlas/").unwrap();

        assert!(dmfr_result.feed_hashmap.len() > 1000);

        fs::write(
            "operator_to_feed_hashmap.json",
            format!("{:#?}", dmfr_result.operator_to_feed_hashmap),
        )
        .expect("Unable to write file");

        fs::write(
            "feed_to_operator_pairs_hashmap.json",
            format!("{:#?}", dmfr_result.feed_to_operator_pairs_hashmap),
        )
        .expect("Unable to write file");

        println!(
            "{} feeds across {} operators",
            dmfr_result.feed_hashmap.len(),
            dmfr_result.operator_hashmap.len()
        );

        println!(
            "Operator to feed hashmap length {}",
            dmfr_result.operator_to_feed_hashmap.len()
        );
        println!(
            "feed_to_operator_pairs_hashmap length {}",
            dmfr_result.feed_to_operator_pairs_hashmap.len()
        );

        assert!(dmfr_result
            .feed_to_operator_pairs_hashmap
            .get("f-ucla~bruinbus~rt")
            .is_some());
        assert!(dmfr_result
            .feed_to_operator_pairs_hashmap
            .get("f-spokanetransitauthority~rt")
            .is_some());
    }
}