ip_cidr/
parser.rs

1use core::{fmt, net, slice, ptr};
2
3enum FamilyType {
4    Unknown,
5    V4,
6    V6,
7}
8
9#[derive(Debug)]
10enum ParserState {
11    Initial,
12    Digit,
13    V4Sep,
14    V6Sep,
15}
16
17mod flag {
18    pub const IS_IPV6_ZERO_SKIP: u8 = 0b010;
19    pub const IS_IPV6_SEP_INITIAL: u8 = 0b100;
20}
21
22struct Parser<'a> {
23    state: ParserState,
24    family: FamilyType,
25    flags: u8,
26    //Number of address components
27    //For IPv4 it is always 4
28    //For normal IPv6 it is always 8
29    components_size: u8,
30    components: [u16; 8],
31    zero_component_start: u8,
32    start_digit_position: usize,
33    text: &'a [u8],
34}
35
36impl<'a> Parser<'a> {
37    const IPV4_LEN: u8 = 4;
38    const IPV6_LEN: u8 = 8;
39
40    #[inline(always)]
41    const fn get_current_component(&self, component_sep_pos: usize) -> &'a str {
42        unsafe {
43            core::str::from_utf8_unchecked(
44                slice::from_raw_parts(self.text.as_ptr().add(self.start_digit_position), component_sep_pos.saturating_sub(self.start_digit_position))
45            )
46        }
47    }
48
49    const fn extract_v4_component(&mut self, component_sep_pos: usize) -> Option<ParseError<'a>> {
50        let text = self.get_current_component(component_sep_pos);
51        if self.components_size >= Self::IPV4_LEN {
52            return Some(ParseError::Ipv4InvalidComponentSize(self.components_size.saturating_add(1)));
53        }
54
55        match u8::from_str_radix(text, 10) {
56            Ok(component) => {
57                self.components[self.components_size as usize] = component as _;
58                self.components_size = self.components_size.saturating_add(1);
59                self.start_digit_position = 0;
60                None
61            },
62            Err(_) => Some(ParseError::InvalidComponent(text)),
63        }
64    }
65
66    const fn extract_v6_component(&mut self, component_sep_pos: usize) -> Option<ParseError<'a>> {
67        let text = self.get_current_component(component_sep_pos);
68        if self.components_size >= Self::IPV6_LEN {
69            return Some(ParseError::Ipv6InvalidComponentSize(self.components_size.saturating_add(1)));
70        }
71
72        match u16::from_str_radix(text, 16) {
73            Ok(component) => {
74                self.components[self.components_size as usize] = component;
75                self.components_size = self.components_size.saturating_add(1);
76                self.start_digit_position = 0;
77                None
78            },
79            Err(_) => Some(ParseError::InvalidComponent(text)),
80        }
81    }
82
83    const fn read_ip_at_last(&mut self, component_sep_pos: usize) -> Result<net::IpAddr, ParseError<'a>> {
84        match self.family {
85            FamilyType::V4 => {
86                if let Some(error) = self.extract_v4_component(component_sep_pos) {
87                    return Err(error)
88                }
89
90                if self.components_size == Self::IPV4_LEN {
91                    Ok(
92                        net::IpAddr::V4(
93                            net::Ipv4Addr::new(
94                                self.components[0] as _,
95                                self.components[1] as _,
96                                self.components[2] as _,
97                                self.components[3] as _
98                            )
99                        )
100                    )
101                } else {
102                    return Err(ParseError::Ipv4InvalidComponentSize(self.components_size));
103                }
104            }
105            FamilyType::V6 => {
106                if let Some(error) = self.extract_v6_component(component_sep_pos) {
107                    return Err(error)
108                }
109
110                self.read_ipv6()
111            }
112            FamilyType::Unknown => match self.state {
113                ParserState::Initial => Err(ParseError::MissingIp),
114                _ => Err(ParseError::InvalidIp),
115            }
116        }
117    }
118
119    const fn read_ipv6(&mut self) -> Result<net::IpAddr, ParseError<'a>> {
120        if self.components_size > Self::IPV6_LEN {
121            Err(ParseError::InvalidIpv6)
122        } else {
123            if self.components_size < Self::IPV6_LEN {
124                if self.flags & flag::IS_IPV6_ZERO_SKIP == flag::IS_IPV6_ZERO_SKIP {
125                    let zero_len = Self::IPV6_LEN.saturating_sub(self.components_size);
126
127                    unsafe {
128                        //always use the *same* pointer otherwise miri will complain about retag
129                        let components_ptr = self.components.as_mut_ptr();
130                        ptr::copy(
131                            components_ptr.add(self.zero_component_start as _),
132                            components_ptr.add(self.zero_component_start.saturating_add(zero_len) as _),
133                            self.components_size.saturating_sub(self.zero_component_start) as _);
134                        ptr::write_bytes(components_ptr.add(self.zero_component_start as _), 0, zero_len as _);
135                    }
136
137                } else {
138                    return Err(ParseError::Ipv6InvalidComponentSize(self.components_size));
139                }
140            }
141
142            let ip = net::Ipv6Addr::new(
143                self.components[0], self.components[1],
144                self.components[2], self.components[3],
145                self.components[4], self.components[5],
146                self.components[6], self.components[7]
147            );
148            Ok(net::IpAddr::V6(ip))
149        }
150    }
151
152    #[inline(always)]
153    const fn on_digit(&mut self, pos: usize) -> Option<ParseError<'a>> {
154        match self.state {
155            ParserState::Digit => None,
156            ParserState::V6Sep if self.flags & flag::IS_IPV6_SEP_INITIAL == flag::IS_IPV6_SEP_INITIAL => Some(ParseError::InvalidIpv6),
157            _ => {
158                self.state = ParserState::Digit;
159                self.start_digit_position = pos;
160                None
161            }
162        }
163    }
164
165    #[inline(always)]
166    const fn on_v4_sep(&mut self, pos: usize) -> Option<ParseError<'a>> {
167        let result = match self.state {
168            ParserState::Digit => match self.family {
169                FamilyType::V6 => return Some(ParseError::InvalidIpv6),
170                FamilyType::Unknown => {
171                    self.family = FamilyType::V4;
172                    self.extract_v4_component(pos)
173                },
174                FamilyType::V4 => self.extract_v4_component(pos),
175            },
176            ParserState::V4Sep | ParserState::V6Sep | ParserState::Initial => Some(ParseError::InvalidIpv4),
177        };
178        self.state = ParserState::V4Sep;
179        result
180    }
181
182    #[inline(always)]
183    const fn on_v6_sep(&mut self, pos: usize) -> Option<ParseError<'a>> {
184        let result = match self.state {
185            ParserState::Digit => match self.family {
186                FamilyType::V4 => return Some(ParseError::InvalidIpv4),
187                FamilyType::Unknown => {
188                    self.family = FamilyType::V6;
189                    self.extract_v6_component(pos)
190                },
191                FamilyType::V6 => self.extract_v6_component(pos),
192            },
193            ParserState::V6Sep => {
194                //Only 1 zero skip is allowed
195                if (self.flags & flag::IS_IPV6_ZERO_SKIP) == flag::IS_IPV6_ZERO_SKIP {
196                    return Some(ParseError::Ipv6MultipleZeroAbbrv);
197                } else {
198                    self.flags = (self.flags & !flag::IS_IPV6_SEP_INITIAL) | flag::IS_IPV6_ZERO_SKIP;
199                    self.zero_component_start = self.components_size;
200                    self.family = FamilyType::V6;
201                    return None
202                }
203            },
204            //You can start with double ::
205            ParserState::Initial => {
206                self.flags |= flag::IS_IPV6_SEP_INITIAL;
207                None
208            }
209            ParserState::V4Sep => Some(ParseError::InvalidIpv4),
210        };
211
212        self.state = ParserState::V6Sep;
213        result
214    }
215
216    //Handles last address component if any
217    const fn on_ip_end(&mut self, last_pos: usize) -> Result<net::IpAddr, ParseError<'a>> {
218        match self.state {
219            ParserState::Digit => self.read_ip_at_last(last_pos),
220            ParserState::V4Sep => Err(ParseError::InvalidIpv4),
221            ParserState::V6Sep if self.flags & flag::IS_IPV6_ZERO_SKIP == flag::IS_IPV6_ZERO_SKIP => {
222                if self.components_size == 0 {
223                    Ok(net::IpAddr::V6(net::Ipv6Addr::UNSPECIFIED))
224                } else {
225                    self.read_ipv6()
226                }
227            },
228            ParserState::V6Sep => Err(ParseError::InvalidIpv6),
229            ParserState::Initial => Err(ParseError::MissingIp),
230        }
231    }
232
233    //Extracts prefix after `pos`
234    const fn on_cidr_sep(&mut self, pos: usize) -> Result<u8, ParseError<'a>> {
235        let digit_pos = pos.saturating_add(1);
236        if digit_pos >= self.text.len() {
237            return Err(ParseError::MissingCidr);
238        }
239
240        let text = unsafe {
241            core::str::from_utf8_unchecked(
242                slice::from_raw_parts(self.text.as_ptr().add(digit_pos), self.text.len().saturating_sub(digit_pos))
243            )
244        };
245
246        match u8::from_str_radix(text, 10) {
247            Ok(result) => match self.family {
248                FamilyType::V4 => {
249                    if result > crate::v4::BITS_LEN {
250                        Err(ParseError::Ipv4CidrPrefixOverflow(result))
251                    } else {
252                        Ok(result)
253                    }
254                },
255                FamilyType::V6 => {
256                    if result > crate::v6::BITS_LEN {
257                        Err(ParseError::Ipv6CidrPrefixOverflow(result))
258                    } else {
259                        Ok(result)
260                    }
261                },
262                FamilyType::Unknown => Err(ParseError::InvalidCidr(text))
263            }
264            Err(_) => Err(ParseError::InvalidCidr(text)),
265        }
266    }
267
268    const fn parse(&mut self) -> Result<(net::IpAddr, Option<u8>), ParseError<'a>> {
269        let mut idx = 0;
270
271        while idx < self.text.len() {
272            let ch = self.text[idx];
273            if ch.is_ascii_hexdigit() {
274                if let Some(error) = self.on_digit(idx) {
275                    return Err(error);
276                }
277            } else if ch == b'.' {
278                if let Some(error) = self.on_v4_sep(idx) {
279                    return Err(error)
280                }
281            } else if ch == b':' {
282                if let Some(error) = self.on_v6_sep(idx) {
283                    return Err(error)
284                }
285            } else if ch == b'/' {
286                let ip = match self.on_ip_end(idx) {
287                    Ok(extracted_ip) => extracted_ip,
288                    Err(error) => return Err(error),
289                };
290                match self.on_cidr_sep(idx) {
291                    Ok(cidr) => return Ok((ip, Some(cidr))),
292                    Err(error) => return Err(error),
293                }
294            } else if ch.is_ascii() {
295                return Err(ParseError::UnexpectedCharacter(ch as _, idx));
296            } else {
297                return Err(ParseError::UnexpectedCharacter(ch as _, idx));
298            }
299
300            idx = idx + 1;
301        }
302
303        match self.on_ip_end(idx) {
304            Ok(ip) => Ok((ip, None)),
305            Err(error) => Err(error)
306        }
307    }
308}
309
310#[derive(Debug, PartialEq, Eq)]
311///Possible errors parsings IP addr
312pub enum ParseError<'a> {
313    ///Invalid address component
314    InvalidComponent(&'a str),
315    ///Invalid CIDR prefix
316    InvalidCidr(&'a str),
317    ///Unexpected character with position where it is encountered at
318    UnexpectedCharacter(char, usize),
319    ///Input is not valid IP
320    InvalidIp,
321    ///Address is not valid IPv4
322    InvalidIpv4,
323    ///IPv4 Address must have 4 components
324    Ipv4InvalidComponentSize(u8),
325    ///Address is not valid IPv6
326    InvalidIpv6,
327    ///IPv6 Address must have 8 components
328    Ipv6InvalidComponentSize(u8),
329    ///IPv6 contains more than 1 zero abbreviation
330    Ipv6MultipleZeroAbbrv,
331    ///Unexpected Non-ASCII character encountered
332    NonAsciiCharacter(usize),
333    ///IP address is not specified
334    MissingIp,
335    ///Prefix is not specified
336    MissingCidr,
337    ///Prefix is greater than 32
338    Ipv4CidrPrefixOverflow(u8),
339    ///Prefix is greater than 128
340    Ipv6CidrPrefixOverflow(u8),
341}
342
343impl fmt::Display for ParseError<'_> {
344    #[inline]
345    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
346        match self {
347            Self::InvalidIp => fmt.write_str("Input is not valid IP"),
348            Self::InvalidIpv4 => fmt.write_str("Address is not valid IPv4"),
349            Self::InvalidIpv6 => fmt.write_str("Address is not valid IPv6"),
350            Self::Ipv4InvalidComponentSize(size) => fmt.write_fmt(format_args!("IPv4 Address has '{size}' components but expected 4")),
351            Self::Ipv6InvalidComponentSize(size) => fmt.write_fmt(format_args!("IPv6 Address has '{size}' components but expected 8")),
352            Self::Ipv6MultipleZeroAbbrv => fmt.write_str("IPv6 contains more than 1 zero abbreviation"),
353            Self::UnexpectedCharacter(ch, pos) => fmt.write_fmt(format_args!("Encountered unexpected character '{ch}' at idx={pos}")),
354            Self::InvalidCidr(cidr) => {
355                fmt.write_str("Invalid Cidr prefix: ")?;
356                fmt.write_str(cidr)
357            },
358            Self::InvalidComponent(addr) => {
359                fmt.write_str("Invalid address component: ")?;
360                fmt.write_str(addr)
361            },
362            Self::NonAsciiCharacter(pos) => fmt.write_fmt(format_args!("Encountered non-ASCII character at idx={pos}")),
363            Self::MissingIp => fmt.write_str("Address is not specified"),
364            Self::MissingCidr => fmt.write_str("Prefix is not specified"),
365            Self::Ipv4CidrPrefixOverflow(prefix) => fmt.write_fmt(format_args!("Prefix '{prefix}' is greater than 32")),
366            Self::Ipv6CidrPrefixOverflow(prefix) => fmt.write_fmt(format_args!("Prefix '{prefix}' is greater than 128")),
367        }
368    }
369}
370
371impl core::error::Error for ParseError<'_> {
372}
373
374///Performs parsing of the string into IP addr with optional CIDR prefix
375pub const fn parse_ip(text: &str) -> Result<(net::IpAddr, Option<u8>), ParseError<'_>> {
376    let text = text.as_bytes();
377
378    let mut parser = Parser {
379        state: ParserState::Initial,
380        flags: 0,
381        family: FamilyType::Unknown,
382        components_size: 0,
383        components: [0; 8],
384        zero_component_start: 0,
385        start_digit_position: 0,
386        text,
387    };
388    parser.parse()
389}