ip2location_bin_format/header/
parser.rs

1use core::ops::ControlFlow;
2use std::io::{BufRead, Error as IoError, Read as _};
3
4use super::schema::{Schema, SchemaSubType, SchemaType, VerifyError};
5
6//
7#[derive(Debug, Default)]
8pub struct Parser {
9    inner: Schema,
10    state: State,
11    buf: [u8; 4],
12}
13
14#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
15enum State {
16    Idle,
17    SubTypeParsed,
18    NumRecordFieldsParsed,
19    DateParsed,
20    V4RecordsCountParsed,
21    V4RecordsPositionStartParsed,
22    V6RecordsCountParsed,
23    V6RecordsPositionStartParsed,
24    V4IndexPositionStartParsed,
25    V6IndexPositionStartParsed,
26    TypeParsed,
27    LicenseCodeParsed,
28    TotalSizeParsed,
29}
30
31impl Default for State {
32    fn default() -> Self {
33        Self::Idle
34    }
35}
36
37impl Parser {
38    pub fn new() -> Self {
39        Self::default()
40    }
41
42    pub fn parse<R: BufRead>(
43        &mut self,
44        r: &mut R,
45    ) -> Result<ControlFlow<(usize, Schema), usize>, ParseError> {
46        let mut take = r.take(0);
47        let mut n_parsed = 0_usize;
48
49        if self.state < State::SubTypeParsed {
50            take.set_limit(1);
51
52            let n = take.read(&mut self.buf[..])?;
53            match n {
54                0 => return Ok(ControlFlow::Continue(n_parsed)),
55                1 => {
56                    let sub_type = SchemaSubType(self.buf[0]);
57
58                    self.state = State::SubTypeParsed;
59                    self.inner.sub_type = sub_type;
60                    n_parsed += n;
61                }
62                _ => unreachable!(),
63            }
64        }
65
66        if self.state < State::NumRecordFieldsParsed {
67            take.set_limit(1);
68
69            let n = take.read(&mut self.buf[..])?;
70            match n {
71                0 => return Ok(ControlFlow::Continue(n_parsed)),
72                1 => {
73                    let num_record_fields = self.buf[0];
74
75                    self.state = State::NumRecordFieldsParsed;
76                    self.inner.num_record_fields = num_record_fields;
77                    n_parsed += n;
78                }
79                _ => unreachable!(),
80            }
81        }
82
83        if self.state < State::DateParsed {
84            take.set_limit(3);
85
86            let n = take.read(&mut self.buf[..])?;
87            match n {
88                0..=2 => return Ok(ControlFlow::Continue(n_parsed)),
89                3 => {
90                    let year = self.buf[0];
91                    let month = self.buf[1];
92                    let day = self.buf[2];
93
94                    #[cfg(feature = "chrono")]
95                    {
96                        chrono::NaiveDate::from_ymd_opt(
97                            (2000 + year as u16) as i32,
98                            month as u32,
99                            day as u32,
100                        )
101                        .ok_or(ParseError::YearOrMonthOrDayValueInvalid(year, month, day))?;
102                    }
103
104                    let date = (year, month, day);
105
106                    self.state = State::DateParsed;
107                    self.inner.date = date;
108                    n_parsed += n;
109                }
110                _ => unreachable!(),
111            }
112        }
113
114        if self.state < State::V4RecordsCountParsed {
115            take.set_limit(4);
116
117            let n = take.read(&mut self.buf[..])?;
118            match n {
119                0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
120                4 => {
121                    let count = u32::from_ne_bytes(self.buf);
122
123                    self.state = State::V4RecordsCountParsed;
124                    self.inner.v4_records_count = count;
125                    n_parsed += n;
126                }
127                _ => unreachable!(),
128            }
129        }
130
131        if self.state < State::V4RecordsPositionStartParsed {
132            take.set_limit(4);
133
134            let n = take.read(&mut self.buf[..])?;
135            match n {
136                0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
137                4 => {
138                    let position_start = u32::from_ne_bytes(self.buf);
139
140                    self.state = State::V4RecordsPositionStartParsed;
141                    self.inner.v4_records_position_start = position_start;
142                    n_parsed += n;
143                }
144                _ => unreachable!(),
145            }
146        }
147
148        if self.state < State::V6RecordsCountParsed {
149            take.set_limit(4);
150
151            let n = take.read(&mut self.buf[..])?;
152            match n {
153                0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
154                4 => {
155                    let count = u32::from_ne_bytes(self.buf);
156
157                    self.state = State::V6RecordsCountParsed;
158                    self.inner.v6_records_count = count;
159                    n_parsed += n;
160                }
161                _ => unreachable!(),
162            }
163        }
164
165        if self.state < State::V6RecordsPositionStartParsed {
166            take.set_limit(4);
167
168            let n = take.read(&mut self.buf[..])?;
169            match n {
170                0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
171                4 => {
172                    let position_start = u32::from_ne_bytes(self.buf);
173
174                    self.state = State::V6RecordsPositionStartParsed;
175                    self.inner.v6_records_position_start = position_start;
176                    n_parsed += n;
177                }
178                _ => unreachable!(),
179            }
180        }
181
182        if self.state < State::V4IndexPositionStartParsed {
183            take.set_limit(4);
184
185            let n = take.read(&mut self.buf[..])?;
186            match n {
187                0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
188                4 => {
189                    let position_start = u32::from_ne_bytes(self.buf);
190
191                    self.state = State::V4IndexPositionStartParsed;
192                    self.inner.v4_index_position_start = position_start;
193                    n_parsed += n;
194                }
195                _ => unreachable!(),
196            }
197        }
198
199        if self.state < State::V6IndexPositionStartParsed {
200            take.set_limit(4);
201
202            let n = take.read(&mut self.buf[..])?;
203            match n {
204                0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
205                4 => {
206                    let position_start = u32::from_ne_bytes(self.buf);
207
208                    self.state = State::V6IndexPositionStartParsed;
209                    self.inner.v6_index_position_start = position_start;
210                    n_parsed += n;
211                }
212                _ => unreachable!(),
213            }
214        }
215
216        if self.state < State::TypeParsed {
217            take.set_limit(1);
218
219            let n = take.read(&mut self.buf[..])?;
220            match n {
221                0 => return Ok(ControlFlow::Continue(n_parsed)),
222                1 => {
223                    let r#type = self.buf[0];
224
225                    let r#type = SchemaType::try_from(r#type)
226                        .map_err(|_| ParseError::TypeValueInvalid(r#type))?;
227
228                    self.state = State::TypeParsed;
229                    self.inner.r#type = r#type;
230                    n_parsed += n;
231                }
232                _ => unreachable!(),
233            }
234        }
235
236        if self.state < State::LicenseCodeParsed {
237            take.set_limit(1);
238
239            let n = take.read(&mut self.buf[..])?;
240            match n {
241                0 => return Ok(ControlFlow::Continue(n_parsed)),
242                1 => {
243                    let license_code = self.buf[0];
244
245                    self.state = State::LicenseCodeParsed;
246                    self.inner.license_code = license_code;
247                    n_parsed += n;
248                }
249                _ => unreachable!(),
250            }
251        }
252
253        if self.state < State::TotalSizeParsed {
254            take.set_limit(4);
255
256            let n = take.read(&mut self.buf[..])?;
257            match n {
258                0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
259                4 => {
260                    let size = u32::from_ne_bytes(self.buf);
261
262                    self.state = State::TotalSizeParsed;
263                    self.inner.total_size = size;
264                    n_parsed += n;
265                }
266                _ => unreachable!(),
267            }
268        }
269
270        //
271        self.inner.verify().map_err(ParseError::VerifyFailed)?;
272
273        //
274        self.state = State::Idle;
275        self.buf.fill_with(Default::default);
276
277        Ok(ControlFlow::Break((n_parsed, self.inner)))
278    }
279}
280
281//
282#[derive(Debug)]
283pub enum ParseError {
284    ReadFailed(IoError),
285    YearOrMonthOrDayValueInvalid(u8, u8, u8),
286    TypeValueInvalid(u8),
287    VerifyFailed(VerifyError),
288}
289
290impl core::fmt::Display for ParseError {
291    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
292        write!(f, "{self:?}")
293    }
294}
295
296impl std::error::Error for ParseError {}
297
298impl From<IoError> for ParseError {
299    fn from(err: IoError) -> Self {
300        Self::ReadFailed(err)
301    }
302}
303
304#[cfg(test)]
305mod tests {
306    use super::*;
307
308    use std::{fs::File, io::Cursor};
309
310    use crate::{
311        header::HEADER_LEN,
312        test_helper::{ip2location_bin_files, ip2proxy_bin_files},
313    };
314
315    #[test]
316    fn test_parse() -> Result<(), Box<dyn std::error::Error>> {
317        for path in ip2location_bin_files().iter() {
318            let mut f = File::open(path)?;
319            let mut buf = vec![0; HEADER_LEN as usize];
320            f.read_exact(&mut buf)?;
321
322            //
323            let mut parser = Parser::new();
324            match parser.parse(&mut Cursor::new(buf))? {
325                ControlFlow::Break((_, schema)) => {
326                    assert_eq!(schema.r#type, SchemaType::IP2Location);
327                    println!("path:{path:?}, schema:{schema:?}");
328                }
329                x => {
330                    panic!("path:{path:?}, ret:{x:?}, parser:{parser:?}")
331                }
332            }
333        }
334
335        for path in ip2proxy_bin_files().iter() {
336            let mut f = File::open(path)?;
337
338            let mut buf = vec![0; HEADER_LEN as usize];
339            f.read_exact(&mut buf)?;
340
341            //
342            let mut parser = Parser::new();
343            match parser.parse(&mut Cursor::new(buf))? {
344                ControlFlow::Break((_, schema)) => {
345                    assert_eq!(schema.r#type, SchemaType::IP2Proxy);
346                    println!("path:{path:?}, schema:{schema:?}");
347                }
348                x => {
349                    panic!("path:{path:?}, ret:{x:?}, parser:{parser:?}")
350                }
351            }
352        }
353
354        Ok(())
355    }
356}