forensic_rs/field/
utils.rs

1use std::net::Ipv6Addr;
2
3pub fn ipv4_from_str(ipv4: &str) -> Result<u32, &'static str> {
4    let mut number: u32 = 0;
5    let mut desplazamiento = 0;
6    for part in ipv4.split('.').rev() {
7        if desplazamiento >= 32 {
8            return Err("More than 4 dots");
9        }
10        let parsed = match part.parse::<u8>() {
11            Ok(v) => v,
12            Err(_) => return Err("Cannot parse as u8"),
13        };
14        number += (parsed as u32) << desplazamiento;
15        desplazamiento += 8;
16    }
17    Ok(number)
18}
19
20/// Check whether an ASCII character represents an hexadecimal digit
21fn is_hex_digit(byte: u8) -> bool {
22    matches!(byte, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')
23}
24/// Convert an ASCII character that represents an hexadecimal digit into this digit
25fn hex_to_digit(byte: u8) -> u8 {
26    match byte {
27        b'0'..=b'9' => byte - b'0',
28        b'a'..=b'f' => byte - b'a' + 10,
29        b'A'..=b'F' => byte - b'A' + 10,
30        _ => unreachable!(),
31    }
32}
33pub fn ipv4_to_u32_bytes(ipv4: &[u8]) -> Result<u32, &'static str> {
34    if ipv4.len() != 4 {
35        return Err("Invalid IPV4 length");
36    }
37    Ok(((ipv4[0] as u32) << 24)
38        + ((ipv4[1] as u32) << 16)
39        + ((ipv4[2] as u32) << 8)
40        + (ipv4[3] as u32))
41}
42
43/// Read up to four ASCII characters that represent hexadecimal digits, and return their value, as
44/// well as the number of characters that were read. If not character is read, `(0, 0)` is
45/// returned.
46fn read_hextet(bytes: &[u8]) -> (usize, u16) {
47    let mut count = 0;
48    let mut digits: [u8; 4] = [0; 4];
49
50    for b in bytes {
51        if is_hex_digit(*b) {
52            digits[count] = hex_to_digit(*b);
53            count += 1;
54            if count == 4 {
55                break;
56            }
57        } else {
58            break;
59        }
60    }
61
62    if count == 0 {
63        return (0, 0);
64    }
65
66    let mut shift = (count - 1) * 4;
67    let mut res = 0;
68    for digit in &digits[0..count] {
69        res += (*digit as u16) << shift;
70        if shift >= 4 {
71            shift -= 4;
72        } else {
73            break;
74        }
75    }
76
77    (count, res)
78}
79
80pub fn ipv6_from_str(s: &str) -> Result<u128, &'static str> {
81    // We'll manipulate bytes instead of UTF-8 characters, because the characters that
82    // represent an IPv6 address are supposed to be ASCII characters.
83    let bytes = s.as_bytes();
84
85    // The maximimum length of a string representing an IPv6 is the length of:
86    //
87    //      1111:2222:3333:4444:5555:6666:7777:8888
88    //
89    // The minimum length of a string representing an IPv6 is the length of:
90    //
91    //      ::
92    //
93    if bytes.len() > 38 || bytes.len() < 2 {
94        return Err("Invalid ipv6 size");
95    }
96
97    let mut offset = 0;
98    let mut ellipsis: Option<usize> = None;
99
100    // Handle the special case where the IP start with "::"
101    if bytes[0] == b':' {
102        if bytes[1] == b':' {
103            if bytes.len() == 2 {
104                return Ok(0);
105            }
106            ellipsis = Some(0);
107            offset += 2;
108        } else {
109            // An IPv6 cannot start with a single column. It must be a double column.
110            // So this is an invalid address
111            return Err("An IPv6 cannot start with a single column.");
112        }
113    }
114
115    // When dealing with IPv6, it's easier to reason in terms of "hextets" instead of octets.
116    // An IPv6 is 8 hextets. At the end, we'll convert that array into an u128.
117    let mut address: [u16; 8] = [0; 8];
118
119    // Keep track of the number of hextets we process
120    let mut hextet_index = 0;
121
122    loop {
123        if offset == bytes.len() {
124            break;
125        }
126
127        // Try to read an hextet
128        let (bytes_read, hextet) = read_hextet(&bytes[offset..]);
129
130        // Handle the case where we could not read an hextet
131        if bytes_read == 0 {
132            match bytes[offset] {
133                // We could not read an hextet because the first character in the slace was ":"
134                // This may be because we have two consecutive columns.
135                b':' => {
136                    // Check if already saw an ellipsis. If so, fail parsing, because an IPv6
137                    // can only have one ellipsis.
138                    if ellipsis.is_some() {
139                        return Err("IPv6 can only have one ellipsis");
140                    }
141                    // Otherwise, remember the position of the ellipsis. We'll need that later
142                    // to count the number of zeros the ellipsis represents.
143                    ellipsis = Some(hextet_index);
144                    offset += 1;
145                    // Continue and try to read the next hextet
146                    continue;
147                }
148                // We now the first character does not represent an hexadecimal digit
149                // (otherwise read_hextet() would have read at least one character), and that
150                // it's not ":", so the string does not represent an IPv6 address
151                _ => return Err("IPv6 can only have one ellipsis"),
152            }
153        }
154
155        // At this point, we know we read an hextet.
156
157        address[hextet_index] = hextet;
158        offset += bytes_read;
159        hextet_index += 1;
160
161        // If this was the last hextet of if we reached the end of the buffer, we should be
162        // done
163        if hextet_index == 8 || offset == bytes.len() {
164            break;
165        }
166
167        // Read the next charachter. After a hextet, we usually expect a column, but there's a special
168        // case for IPv6 that ends with an IPv4.
169        match bytes[offset] {
170            // We saw the column, we can continue
171            b':' => offset += 1,
172            // Handle the special IPv4 case, ie address like. Note that the hextet we just read
173            // is part of that IPv4 address:
174            //
175            // aaaa:bbbb:cccc:dddd:eeee:ffff:a.b.c.d.
176            //                               ^^
177            //                               ||
178            // hextet we just read, that  ---+|
179            // is actually the first byte of  +--- dot we're handling
180            // the ipv4.
181            b'.' => {
182                // The hextet was actually part of the IPv4, so not that we start reading the
183                // IPv4 at `offset - bytes_read`.
184                let ipv4: u32 = ipv4_to_u32_bytes(&bytes[offset - bytes_read..])?;
185                // Replace the hextet we just read by the 16 most significant bits of the
186                // IPv4 address (a.b in the comment above)
187                address[hextet_index - 1] = ((ipv4 & 0xffff_0000) >> 16) as u16;
188                // Set the last hextet to the 16 least significant bits of the IPv4 address
189                // (c.d in the comment above)
190                address[hextet_index] = (ipv4 & 0x0000_ffff) as u16;
191                hextet_index += 1;
192                // After successfully parsing an IPv4, we should be done.
193                // If there are bytes left in the buffer, or if we didn't read enough hextet,
194                // we'll fail later.
195                break;
196            }
197            _ => return Err("Unexpected error"),
198        }
199    } // end of loop
200
201    // If we exited the loop, we should have reached the end of the buffer.
202    // If there are trailing characters, parsing should fail.
203    if offset < bytes.len() {
204        return Err("There are trailing characters");
205    }
206
207    if hextet_index == 8 && ellipsis.is_some() {
208        // We parsed an address that looks like 1111:2222::3333:4444:5555:6666:7777,
209        // ie with an empty ellipsis.
210        return Err("Empty elipsis");
211    }
212
213    // We didn't parse enough hextets, but this may be due to an ellipsis
214    if hextet_index < 8 {
215        if let Some(ellipsis_index) = ellipsis {
216            // Count how many zeros the ellipsis accounts for
217            let nb_zeros = 8 - hextet_index;
218            // Shift the hextet that we read after the ellipsis by the number of zeros
219            for index in (ellipsis_index..hextet_index).rev() {
220                address[index + nb_zeros] = address[index];
221                address[index] = 0;
222            }
223        } else {
224            return Err("Error");
225        }
226    }
227
228    // Build the IPv6 address from the array of hextets
229    Ok(((address[0] as u128) << 112)
230        + ((address[1] as u128) << 96)
231        + ((address[2] as u128) << 80)
232        + ((address[3] as u128) << 64)
233        + ((address[4] as u128) << 48)
234        + ((address[5] as u128) << 32)
235        + ((address[6] as u128) << 16)
236        + address[7] as u128)
237}
238
239pub fn ipv4_to_str(ipv4: u32) -> String {
240    let mut chars = [0, 0, 0, 0];
241    let mut ip = ipv4;
242    for i in (0..4).rev() {
243        chars[i] = ip & 0xFF;
244        ip >>= 8;
245    }
246    format!("{}.{}.{}.{}", chars[0], chars[1], chars[2], chars[3])
247}
248
249pub fn ipv6_to_str(ipv6: u128) -> String {
250    Ipv6Addr::from(ipv6).to_string()
251}
252pub fn is_local_ipv6(ip: u128) -> bool {
253    ip >> 120 & 0xfe == 0xfc
254}
255pub fn is_local_ipv4(ip: u32) -> bool {
256    /*
257     *
258    Range from 10.0.0.0 to 10.255.255.255 — a 10.0.0.0 network with a 255.0.0.0 or an /8 (8-bit) mask
259    Range from 172.16.0.0 to 172.31.255.255 — a 172.16.0.0 network with a 255.240.0.0 (or a 12-bit) mask
260    A 192.168.0.0 to 192.168.255.255 range, which is a 192.168.0.0 network masked by 255.255.0.0 or /16
261    A special range 100.64.0.0 to 100.127.255.255 with a 255.192.0.0 or /10 network mask; this subnet is recommended according to rfc6598 for use as an address pool for CGN (Carrier-Grade NAT)
262    */
263    let firstnumber = ip >> 24;
264    if firstnumber == 10 {
265        return true;
266    }
267    let secondnumber = (ip >> 16) & 0xFF;
268    if firstnumber == 172 && (16..=31).contains(&secondnumber) {
269        return true;
270    }
271    if firstnumber == 192 && secondnumber == 168 {
272        return true;
273    }
274    if firstnumber == 100 && (64..=127).contains(&secondnumber) {
275        return true;
276    }
277    false
278}
279
280pub fn port_to_u16(port: &str) -> Result<u16, &'static str> {
281    match port.parse::<u16>() {
282        Ok(port) => Ok(port),
283        Err(_) => Err("Cannot parse port as u16"),
284    }
285}
286
287pub fn parse_ipv4_port(text: &str) -> Option<(u32, u16)> {
288    match text.rfind(':') {
289        Some(pos) => match (ipv4_from_str(&text[..pos]), port_to_u16(&text[(pos + 1)..])) {
290            (Ok(v1), Ok(v2)) => Some((v1, v2)),
291            _ => None,
292        },
293        None => None,
294    }
295}
296
297pub fn is_ipv4_port(text: &str) -> bool {
298    match text.rfind(':') {
299        Some(pos) => matches!((ipv4_from_str(&text[..pos]), port_to_u16(&text[(pos + 1)..])), (Ok(_), Ok(_))),
300        None => false,
301    }
302}
303pub fn is_ipv4(text: &str) -> bool {
304    ipv4_from_str(text).is_ok()
305}
306pub fn is_ipv6(text: &str) -> bool {
307    ipv6_from_str(text).is_ok()
308}
309
310#[cfg(test)]
311mod tests {
312    use super::*;
313
314    #[test]
315    fn should_parse_ips() {
316        //192.168.1.1 = 3232235777
317        assert_eq!(3232235777, ipv4_from_str("192.168.1.1").unwrap());
318        //8.8.8.8 = 134744072
319        assert_eq!(134744072, ipv4_from_str("8.8.8.8").unwrap());
320        //10.127.222.21 = 176152085
321        assert_eq!(176152085, ipv4_from_str("10.127.222.21").unwrap());
322        //100.64.0.0 = 1681915904
323        assert_eq!(1681915904, ipv4_from_str("100.64.0.0").unwrap());
324        //10.255.255.255 = 184549375
325        assert_eq!(184549375, ipv4_from_str("10.255.255.255").unwrap());
326    }
327
328    #[test]
329    fn should_parse_ip_from_u8_array() {
330        //192.168.1.1 = 3232235777
331        assert_eq!(3232235777, ipv4_to_u32_bytes(&[192, 168, 1, 1]).unwrap());
332        //8.8.8.8 = 134744072
333        assert_eq!(134744072, ipv4_to_u32_bytes(&[8, 8, 8, 8]).unwrap());
334        //10.127.222.21 = 176152085
335        assert_eq!(176152085, ipv4_to_u32_bytes(&[10, 127, 222, 21]).unwrap());
336        //100.64.0.0 = 1681915904
337        assert_eq!(1681915904, ipv4_to_u32_bytes(&[100, 64, 0, 0]).unwrap());
338        //10.255.255.255 = 184549375
339        assert_eq!(184549375, ipv4_to_u32_bytes(&[10, 255, 255, 255]).unwrap());
340    }
341
342    #[test]
343    fn check_ip_is_local() {
344        //192.168.1.1 = 3232235777
345        assert_eq!(is_local_ipv4(3232235777), true);
346        //8.8.8.8 = 134744072
347        assert_eq!(is_local_ipv4(134744072), false);
348        //10.127.222.21 = 176152085
349        assert_eq!(is_local_ipv4(176152085), true);
350        //100.64.0.0 = 1681915904
351        assert_eq!(is_local_ipv4(1681915904), true);
352        //10.255.255.255 = 184549375
353        assert_eq!(is_local_ipv4(184549375), true);
354    }
355
356    #[test]
357    fn should_parse_socket() {
358        assert_eq!(is_ipv4_port("192.168.0.1:1000"), true);
359        assert_eq!(is_ipv4_port("192.168.0.1:100000"), false);
360        assert_eq!(is_ipv4("256.168.0.1"), false);
361        assert_eq!(is_ipv4_port("800.168.0.1:10000"), false);
362        assert_eq!(is_ipv4_port("80.0.168.0.1:10000"), false);
363    }
364}