ip2location_bin_format/header/
parser.rs1use core::ops::ControlFlow;
2use std::io::{BufRead, Error as IoError, Read as _};
3
4use super::schema::{Schema, SchemaSubType, SchemaType, VerifyError};
5
6#[derive(Debug, Default)]
8pub struct Parser {
9 inner: Schema,
10 state: State,
11 buf: [u8; 4],
12}
13
14#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
15enum State {
16 Idle,
17 SubTypeParsed,
18 NumRecordFieldsParsed,
19 DateParsed,
20 V4RecordsCountParsed,
21 V4RecordsPositionStartParsed,
22 V6RecordsCountParsed,
23 V6RecordsPositionStartParsed,
24 V4IndexPositionStartParsed,
25 V6IndexPositionStartParsed,
26 TypeParsed,
27 LicenseCodeParsed,
28 TotalSizeParsed,
29}
30
31impl Default for State {
32 fn default() -> Self {
33 Self::Idle
34 }
35}
36
37impl Parser {
38 pub fn new() -> Self {
39 Self::default()
40 }
41
42 pub fn parse<R: BufRead>(
43 &mut self,
44 r: &mut R,
45 ) -> Result<ControlFlow<(usize, Schema), usize>, ParseError> {
46 let mut take = r.take(0);
47 let mut n_parsed = 0_usize;
48
49 if self.state < State::SubTypeParsed {
50 take.set_limit(1);
51
52 let n = take.read(&mut self.buf[..])?;
53 match n {
54 0 => return Ok(ControlFlow::Continue(n_parsed)),
55 1 => {
56 let sub_type = SchemaSubType(self.buf[0]);
57
58 self.state = State::SubTypeParsed;
59 self.inner.sub_type = sub_type;
60 n_parsed += n;
61 }
62 _ => unreachable!(),
63 }
64 }
65
66 if self.state < State::NumRecordFieldsParsed {
67 take.set_limit(1);
68
69 let n = take.read(&mut self.buf[..])?;
70 match n {
71 0 => return Ok(ControlFlow::Continue(n_parsed)),
72 1 => {
73 let num_record_fields = self.buf[0];
74
75 self.state = State::NumRecordFieldsParsed;
76 self.inner.num_record_fields = num_record_fields;
77 n_parsed += n;
78 }
79 _ => unreachable!(),
80 }
81 }
82
83 if self.state < State::DateParsed {
84 take.set_limit(3);
85
86 let n = take.read(&mut self.buf[..])?;
87 match n {
88 0..=2 => return Ok(ControlFlow::Continue(n_parsed)),
89 3 => {
90 let year = self.buf[0];
91 let month = self.buf[1];
92 let day = self.buf[2];
93
94 #[cfg(feature = "chrono")]
95 {
96 chrono::NaiveDate::from_ymd_opt(
97 (2000 + year as u16) as i32,
98 month as u32,
99 day as u32,
100 )
101 .ok_or(ParseError::YearOrMonthOrDayValueInvalid(year, month, day))?;
102 }
103
104 let date = (year, month, day);
105
106 self.state = State::DateParsed;
107 self.inner.date = date;
108 n_parsed += n;
109 }
110 _ => unreachable!(),
111 }
112 }
113
114 if self.state < State::V4RecordsCountParsed {
115 take.set_limit(4);
116
117 let n = take.read(&mut self.buf[..])?;
118 match n {
119 0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
120 4 => {
121 let count = u32::from_ne_bytes(self.buf);
122
123 self.state = State::V4RecordsCountParsed;
124 self.inner.v4_records_count = count;
125 n_parsed += n;
126 }
127 _ => unreachable!(),
128 }
129 }
130
131 if self.state < State::V4RecordsPositionStartParsed {
132 take.set_limit(4);
133
134 let n = take.read(&mut self.buf[..])?;
135 match n {
136 0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
137 4 => {
138 let position_start = u32::from_ne_bytes(self.buf);
139
140 self.state = State::V4RecordsPositionStartParsed;
141 self.inner.v4_records_position_start = position_start;
142 n_parsed += n;
143 }
144 _ => unreachable!(),
145 }
146 }
147
148 if self.state < State::V6RecordsCountParsed {
149 take.set_limit(4);
150
151 let n = take.read(&mut self.buf[..])?;
152 match n {
153 0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
154 4 => {
155 let count = u32::from_ne_bytes(self.buf);
156
157 self.state = State::V6RecordsCountParsed;
158 self.inner.v6_records_count = count;
159 n_parsed += n;
160 }
161 _ => unreachable!(),
162 }
163 }
164
165 if self.state < State::V6RecordsPositionStartParsed {
166 take.set_limit(4);
167
168 let n = take.read(&mut self.buf[..])?;
169 match n {
170 0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
171 4 => {
172 let position_start = u32::from_ne_bytes(self.buf);
173
174 self.state = State::V6RecordsPositionStartParsed;
175 self.inner.v6_records_position_start = position_start;
176 n_parsed += n;
177 }
178 _ => unreachable!(),
179 }
180 }
181
182 if self.state < State::V4IndexPositionStartParsed {
183 take.set_limit(4);
184
185 let n = take.read(&mut self.buf[..])?;
186 match n {
187 0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
188 4 => {
189 let position_start = u32::from_ne_bytes(self.buf);
190
191 self.state = State::V4IndexPositionStartParsed;
192 self.inner.v4_index_position_start = position_start;
193 n_parsed += n;
194 }
195 _ => unreachable!(),
196 }
197 }
198
199 if self.state < State::V6IndexPositionStartParsed {
200 take.set_limit(4);
201
202 let n = take.read(&mut self.buf[..])?;
203 match n {
204 0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
205 4 => {
206 let position_start = u32::from_ne_bytes(self.buf);
207
208 self.state = State::V6IndexPositionStartParsed;
209 self.inner.v6_index_position_start = position_start;
210 n_parsed += n;
211 }
212 _ => unreachable!(),
213 }
214 }
215
216 if self.state < State::TypeParsed {
217 take.set_limit(1);
218
219 let n = take.read(&mut self.buf[..])?;
220 match n {
221 0 => return Ok(ControlFlow::Continue(n_parsed)),
222 1 => {
223 let r#type = self.buf[0];
224
225 let r#type = SchemaType::try_from(r#type)
226 .map_err(|_| ParseError::TypeValueInvalid(r#type))?;
227
228 self.state = State::TypeParsed;
229 self.inner.r#type = r#type;
230 n_parsed += n;
231 }
232 _ => unreachable!(),
233 }
234 }
235
236 if self.state < State::LicenseCodeParsed {
237 take.set_limit(1);
238
239 let n = take.read(&mut self.buf[..])?;
240 match n {
241 0 => return Ok(ControlFlow::Continue(n_parsed)),
242 1 => {
243 let license_code = self.buf[0];
244
245 self.state = State::LicenseCodeParsed;
246 self.inner.license_code = license_code;
247 n_parsed += n;
248 }
249 _ => unreachable!(),
250 }
251 }
252
253 if self.state < State::TotalSizeParsed {
254 take.set_limit(4);
255
256 let n = take.read(&mut self.buf[..])?;
257 match n {
258 0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
259 4 => {
260 let size = u32::from_ne_bytes(self.buf);
261
262 self.state = State::TotalSizeParsed;
263 self.inner.total_size = size;
264 n_parsed += n;
265 }
266 _ => unreachable!(),
267 }
268 }
269
270 self.inner.verify().map_err(ParseError::VerifyFailed)?;
272
273 self.state = State::Idle;
275 self.buf.fill_with(Default::default);
276
277 Ok(ControlFlow::Break((n_parsed, self.inner)))
278 }
279}
280
281#[derive(Debug)]
283pub enum ParseError {
284 ReadFailed(IoError),
285 YearOrMonthOrDayValueInvalid(u8, u8, u8),
286 TypeValueInvalid(u8),
287 VerifyFailed(VerifyError),
288}
289
290impl core::fmt::Display for ParseError {
291 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
292 write!(f, "{self:?}")
293 }
294}
295
296impl std::error::Error for ParseError {}
297
298impl From<IoError> for ParseError {
299 fn from(err: IoError) -> Self {
300 Self::ReadFailed(err)
301 }
302}
303
304#[cfg(test)]
305mod tests {
306 use super::*;
307
308 use std::{fs::File, io::Cursor};
309
310 use crate::{
311 header::HEADER_LEN,
312 test_helper::{ip2location_bin_files, ip2proxy_bin_files},
313 };
314
315 #[test]
316 fn test_parse() -> Result<(), Box<dyn std::error::Error>> {
317 for path in ip2location_bin_files().iter() {
318 let mut f = File::open(path)?;
319 let mut buf = vec![0; HEADER_LEN as usize];
320 f.read_exact(&mut buf)?;
321
322 let mut parser = Parser::new();
324 match parser.parse(&mut Cursor::new(buf))? {
325 ControlFlow::Break((_, schema)) => {
326 assert_eq!(schema.r#type, SchemaType::IP2Location);
327 println!("path:{path:?}, schema:{schema:?}");
328 }
329 x => {
330 panic!("path:{path:?}, ret:{x:?}, parser:{parser:?}")
331 }
332 }
333 }
334
335 for path in ip2proxy_bin_files().iter() {
336 let mut f = File::open(path)?;
337
338 let mut buf = vec![0; HEADER_LEN as usize];
339 f.read_exact(&mut buf)?;
340
341 let mut parser = Parser::new();
343 match parser.parse(&mut Cursor::new(buf))? {
344 ControlFlow::Break((_, schema)) => {
345 assert_eq!(schema.r#type, SchemaType::IP2Proxy);
346 println!("path:{path:?}, schema:{schema:?}");
347 }
348 x => {
349 panic!("path:{path:?}, ret:{x:?}, parser:{parser:?}")
350 }
351 }
352 }
353
354 Ok(())
355 }
356}