ip2location-bin-format 0.4.0

IP2Location BIN Format
Documentation
use core::ops::ControlFlow;
use std::io::{BufRead, Error as IoError, Read as _};

use super::schema::{Schema, SchemaSubType, SchemaType, VerifyError};

//
#[derive(Debug, Default)]
pub struct Parser {
    inner: Schema,
    state: State,
    buf: [u8; 4],
}

#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
enum State {
    Idle,
    SubTypeParsed,
    NumRecordFieldsParsed,
    DateParsed,
    V4RecordsCountParsed,
    V4RecordsPositionStartParsed,
    V6RecordsCountParsed,
    V6RecordsPositionStartParsed,
    V4IndexPositionStartParsed,
    V6IndexPositionStartParsed,
    TypeParsed,
    LicenseCodeParsed,
    TotalSizeParsed,
}

impl Default for State {
    fn default() -> Self {
        Self::Idle
    }
}

impl Parser {
    pub fn new() -> Self {
        Self::default()
    }

    pub fn parse<R: BufRead>(
        &mut self,
        r: &mut R,
    ) -> Result<ControlFlow<(usize, Schema), usize>, ParseError> {
        let mut take = r.take(0);
        let mut n_parsed = 0_usize;

        if self.state < State::SubTypeParsed {
            take.set_limit(1);

            let n = take.read(&mut self.buf[..])?;
            match n {
                0 => return Ok(ControlFlow::Continue(n_parsed)),
                1 => {
                    let sub_type = SchemaSubType(self.buf[0]);

                    self.state = State::SubTypeParsed;
                    self.inner.sub_type = sub_type;
                    n_parsed += n;
                }
                _ => unreachable!(),
            }
        }

        if self.state < State::NumRecordFieldsParsed {
            take.set_limit(1);

            let n = take.read(&mut self.buf[..])?;
            match n {
                0 => return Ok(ControlFlow::Continue(n_parsed)),
                1 => {
                    let num_record_fields = self.buf[0];

                    self.state = State::NumRecordFieldsParsed;
                    self.inner.num_record_fields = num_record_fields;
                    n_parsed += n;
                }
                _ => unreachable!(),
            }
        }

        if self.state < State::DateParsed {
            take.set_limit(3);

            let n = take.read(&mut self.buf[..])?;
            match n {
                0..=2 => return Ok(ControlFlow::Continue(n_parsed)),
                3 => {
                    let year = self.buf[0];
                    let month = self.buf[1];
                    let day = self.buf[2];

                    #[cfg(feature = "chrono")]
                    {
                        chrono::NaiveDate::from_ymd_opt(
                            (2000 + year as u16) as i32,
                            month as u32,
                            day as u32,
                        )
                        .ok_or(ParseError::YearOrMonthOrDayValueInvalid(year, month, day))?;
                    }

                    let date = (year, month, day);

                    self.state = State::DateParsed;
                    self.inner.date = date;
                    n_parsed += n;
                }
                _ => unreachable!(),
            }
        }

        if self.state < State::V4RecordsCountParsed {
            take.set_limit(4);

            let n = take.read(&mut self.buf[..])?;
            match n {
                0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
                4 => {
                    let count = u32::from_ne_bytes(self.buf);

                    self.state = State::V4RecordsCountParsed;
                    self.inner.v4_records_count = count;
                    n_parsed += n;
                }
                _ => unreachable!(),
            }
        }

        if self.state < State::V4RecordsPositionStartParsed {
            take.set_limit(4);

            let n = take.read(&mut self.buf[..])?;
            match n {
                0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
                4 => {
                    let position_start = u32::from_ne_bytes(self.buf);

                    self.state = State::V4RecordsPositionStartParsed;
                    self.inner.v4_records_position_start = position_start;
                    n_parsed += n;
                }
                _ => unreachable!(),
            }
        }

        if self.state < State::V6RecordsCountParsed {
            take.set_limit(4);

            let n = take.read(&mut self.buf[..])?;
            match n {
                0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
                4 => {
                    let count = u32::from_ne_bytes(self.buf);

                    self.state = State::V6RecordsCountParsed;
                    self.inner.v6_records_count = count;
                    n_parsed += n;
                }
                _ => unreachable!(),
            }
        }

        if self.state < State::V6RecordsPositionStartParsed {
            take.set_limit(4);

            let n = take.read(&mut self.buf[..])?;
            match n {
                0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
                4 => {
                    let position_start = u32::from_ne_bytes(self.buf);

                    self.state = State::V6RecordsPositionStartParsed;
                    self.inner.v6_records_position_start = position_start;
                    n_parsed += n;
                }
                _ => unreachable!(),
            }
        }

        if self.state < State::V4IndexPositionStartParsed {
            take.set_limit(4);

            let n = take.read(&mut self.buf[..])?;
            match n {
                0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
                4 => {
                    let position_start = u32::from_ne_bytes(self.buf);

                    self.state = State::V4IndexPositionStartParsed;
                    self.inner.v4_index_position_start = position_start;
                    n_parsed += n;
                }
                _ => unreachable!(),
            }
        }

        if self.state < State::V6IndexPositionStartParsed {
            take.set_limit(4);

            let n = take.read(&mut self.buf[..])?;
            match n {
                0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
                4 => {
                    let position_start = u32::from_ne_bytes(self.buf);

                    self.state = State::V6IndexPositionStartParsed;
                    self.inner.v6_index_position_start = position_start;
                    n_parsed += n;
                }
                _ => unreachable!(),
            }
        }

        if self.state < State::TypeParsed {
            take.set_limit(1);

            let n = take.read(&mut self.buf[..])?;
            match n {
                0 => return Ok(ControlFlow::Continue(n_parsed)),
                1 => {
                    let r#type = self.buf[0];

                    let r#type = SchemaType::try_from(r#type)
                        .map_err(|_| ParseError::TypeValueInvalid(r#type))?;

                    self.state = State::TypeParsed;
                    self.inner.r#type = r#type;
                    n_parsed += n;
                }
                _ => unreachable!(),
            }
        }

        if self.state < State::LicenseCodeParsed {
            take.set_limit(1);

            let n = take.read(&mut self.buf[..])?;
            match n {
                0 => return Ok(ControlFlow::Continue(n_parsed)),
                1 => {
                    let license_code = self.buf[0];

                    self.state = State::LicenseCodeParsed;
                    self.inner.license_code = license_code;
                    n_parsed += n;
                }
                _ => unreachable!(),
            }
        }

        if self.state < State::TotalSizeParsed {
            take.set_limit(4);

            let n = take.read(&mut self.buf[..])?;
            match n {
                0..=3 => return Ok(ControlFlow::Continue(n_parsed)),
                4 => {
                    let size = u32::from_ne_bytes(self.buf);

                    self.state = State::TotalSizeParsed;
                    self.inner.total_size = size;
                    n_parsed += n;
                }
                _ => unreachable!(),
            }
        }

        //
        self.inner.verify().map_err(ParseError::VerifyFailed)?;

        //
        self.state = State::Idle;
        self.buf.fill_with(Default::default);

        Ok(ControlFlow::Break((n_parsed, self.inner)))
    }
}

//
#[derive(Debug)]
pub enum ParseError {
    ReadFailed(IoError),
    YearOrMonthOrDayValueInvalid(u8, u8, u8),
    TypeValueInvalid(u8),
    VerifyFailed(VerifyError),
}

impl core::fmt::Display for ParseError {
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
        write!(f, "{self:?}")
    }
}

impl std::error::Error for ParseError {}

impl From<IoError> for ParseError {
    fn from(err: IoError) -> Self {
        Self::ReadFailed(err)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    use std::{fs::File, io::Cursor};

    use crate::{
        header::HEADER_LEN,
        test_helper::{ip2location_bin_files, ip2proxy_bin_files},
    };

    #[test]
    fn test_parse() -> Result<(), Box<dyn std::error::Error>> {
        for path in ip2location_bin_files().iter() {
            let mut f = File::open(path)?;
            let mut buf = vec![0; HEADER_LEN as usize];
            f.read_exact(&mut buf)?;

            //
            let mut parser = Parser::new();
            match parser.parse(&mut Cursor::new(buf))? {
                ControlFlow::Break((_, schema)) => {
                    assert_eq!(schema.r#type, SchemaType::IP2Location);
                    println!("path:{path:?}, schema:{schema:?}");
                }
                x => {
                    panic!("path:{path:?}, ret:{x:?}, parser:{parser:?}")
                }
            }
        }

        for path in ip2proxy_bin_files().iter() {
            let mut f = File::open(path)?;

            let mut buf = vec![0; HEADER_LEN as usize];
            f.read_exact(&mut buf)?;

            //
            let mut parser = Parser::new();
            match parser.parse(&mut Cursor::new(buf))? {
                ControlFlow::Break((_, schema)) => {
                    assert_eq!(schema.r#type, SchemaType::IP2Proxy);
                    println!("path:{path:?}, schema:{schema:?}");
                }
                x => {
                    panic!("path:{path:?}, ret:{x:?}, parser:{parser:?}")
                }
            }
        }

        Ok(())
    }
}