1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
//! Rust library for reading the [eBird Basic Dataset (EBD)][ebd].
//!
//! # Examples
//!
//! ```rust
//! use std::io;
//!
//! fn main() -> Result<(), Box<dyn std::error::Error>> {
//!     let mut ebd_reader = ebd::Reader::from_reader(io::stdin());
//!
//!     while ebd_reader.advance()? {
//!         let record = ebd_reader.read_record()?;
//!
//!         println!(
//!             "(lng: {}, lat: {})",
//!             record.longitude,
//!             record.latitude
//!         );
//!     }
//!
//!     Ok(())
//! }
//! ```

use std::{fmt, io, marker};

pub struct Reader<'a, R: io::Read> {
    csv_reader: csv::Reader<R>,
    csv_byte_record: csv::ByteRecord,
    phantom_data: marker::PhantomData<&'a ()>,
}

impl<'a, R: io::Read> Reader<'a, R> {
    pub fn from_reader(reader: R) -> Self {
        let csv_reader = csv::ReaderBuilder::new()
            .delimiter(b'\t')
            .from_reader(reader);
        let csv_byte_record = csv::ByteRecord::new();

        Reader {
            csv_reader,
            csv_byte_record,
            phantom_data: marker::PhantomData,
        }
    }

    pub fn advance<'b>(&'b mut self) -> csv::Result<bool> {
        self.csv_reader.read_byte_record(&mut self.csv_byte_record)
    }

    pub fn read_record(&'a self) -> csv::Result<Record<'a>> {
        self.csv_byte_record.deserialize(None)
    }
}

#[derive(Debug, serde::Deserialize)]
pub struct Record<'a> {
    #[serde(rename = "GLOBAL UNIQUE IDENTIFIER")]
    pub global_unique_identifier: &'a str,

    #[serde(rename = "LAST EDITED DATE")]
    pub last_edited_date: &'a str,

    #[serde(rename = "TAXONOMIC ORDER")]
    pub taxonomic_order: &'a str,

    #[serde(rename = "CATEGORY")]
    pub category: &'a str,

    #[serde(rename = "COMMON NAME")]
    pub common_name: &'a str,

    #[serde(rename = "SCIENTIFIC NAME")]
    pub scientific_name: &'a str,

    #[serde(rename = "SUBSPECIES COMMON NAME")]
    pub subspecies_common_name: &'a str,

    #[serde(rename = "SUBSPECIES SCIENTIFIC NAME")]
    pub subspecies_scientific_name: &'a str,

    #[serde(rename = "OBSERVATION COUNT")]
    pub observation_count: &'a str,

    #[serde(rename = "BREEDING BIRD ATLAS CODE")]
    pub breeding_bird_atlas_code: &'a str,

    #[serde(rename = "BREEDING BIRD ATLAS CATEGORY")]
    pub breeding_bird_atlas_category: &'a str,

    #[serde(rename = "AGE/SEX")]
    pub agesex: &'a str,

    #[serde(rename = "COUNTRY")]
    pub country: &'a str,

    #[serde(rename = "COUNTRY CODE")]
    pub country_code: &'a str,

    #[serde(rename = "STATE")]
    pub state: &'a str,

    #[serde(rename = "STATE CODE")]
    pub state_code: &'a str,

    #[serde(rename = "COUNTY")]
    pub county: &'a str,

    #[serde(rename = "COUNTY CODE")]
    pub county_code: &'a str,

    #[serde(rename = "IBA CODE")]
    pub iba_code: &'a str,

    #[serde(rename = "BCR CODE")]
    pub bcr_code: &'a str,

    #[serde(rename = "USFWS CODE")]
    pub usfws_code: &'a str,

    #[serde(rename = "ATLAS BLOCK")]
    pub atlas_block: &'a str,

    #[serde(rename = "LOCALITY")]
    pub locality: &'a str,

    #[serde(rename = "LOCALITY ID")]
    pub locality_id: &'a str,

    #[serde(rename = "LOCALITY TYPE")]
    pub locality_type: &'a str,

    #[serde(rename = "LATITUDE")]
    pub latitude: f64,

    #[serde(rename = "LONGITUDE")]
    pub longitude: f64,

    #[serde(rename = "OBSERVATION DATE")]
    pub observation_date: &'a str,

    #[serde(rename = "TIME OBSERVATIONS STARTED")]
    pub time_observations_started: &'a str,

    #[serde(rename = "OBSERVER ID")]
    pub observer_id: &'a str,

    #[serde(rename = "SAMPLING EVENT IDENTIFIER")]
    pub sampling_event_identifier: &'a str,

    #[serde(rename = "PROTOCOL TYPE")]
    pub protocol_type: &'a str,

    #[serde(rename = "PROTOCOL CODE")]
    pub protocol_code: &'a str,

    #[serde(rename = "PROJECT CODE")]
    pub project_code: &'a str,

    #[serde(rename = "DURATION MINUTES")]
    pub duration_minutes: &'a str,

    #[serde(rename = "EFFORT DISTANCE KM")]
    pub effort_distance_km: &'a str,

    #[serde(rename = "EFFORT AREA HA")]
    pub effort_area_ha: &'a str,

    #[serde(rename = "NUMBER OBSERVERS")]
    pub number_observers: &'a str,

    #[serde(
        rename = "ALL SPECIES REPORTED",
        deserialize_with = "deserialize_bool_from_u64"
    )]
    pub all_species_reported: bool,

    #[serde(rename = "GROUP IDENTIFIER")]
    pub group_identifier: &'a str,

    #[serde(rename = "HAS MEDIA", deserialize_with = "deserialize_bool_from_u64")]
    pub has_media: bool,

    #[serde(rename = "APPROVED", deserialize_with = "deserialize_bool_from_u64")]
    pub approved: bool,

    #[serde(rename = "REVIEWED", deserialize_with = "deserialize_bool_from_u64")]
    pub reviewed: bool,

    #[serde(rename = "REASON")]
    pub reason: &'a str,

    #[serde(rename = "TRIP COMMENTS")]
    pub trip_comments: &'a str,

    #[serde(rename = "SPECIES COMMENTS")]
    pub species_comments: &'a str,
}

pub fn deserialize_bool_from_u64<'de, D>(deserializer: D) -> Result<bool, D::Error>
where
    D: serde::Deserializer<'de>,
{
    struct U64ToBoolVisitor;

    impl<'de> serde::de::Visitor<'de> for U64ToBoolVisitor {
        type Value = bool;

        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
            formatter.write_str("an integer `0` or `1`")
        }

        fn visit_u64<E>(self, n: u64) -> Result<Self::Value, E>
        where
            E: serde::de::Error,
        {
            match n {
                0 => Ok(false),
                1 => Ok(true),
                _ => panic!("TODO"),
            }
        }
    }

    deserializer.deserialize_any(U64ToBoolVisitor)
}