Skip to main content

ip_extract/
lib.rs

1//! High-performance IP address extraction and tagging engine.
2//!
3//! `ip-extract` provides a blazingly fast, configurable extractor for finding IPv4 and IPv6
4//! addresses in unstructured text. It achieves maximum throughput through:
5//!
6//! - **Compile-time DFA**: IP patterns are converted to dense Forward DFAs during build,
7//!   eliminating runtime regex compilation and heap allocation.
8//! - **Zero-overhead scanning**: The DFA scans at O(n) with no backtracking; validation
9//!   is performed only on candidates.
10//! - **Strict validation**: Deep checks eliminate false positives (e.g., `1.2.3.4.5` is rejected).
11//!
12//! ## Quick Start
13//!
14//! By default, **all IP addresses are extracted**:
15//!
16//! ```no_run
17//! use ip_extract::ExtractorBuilder;
18//!
19//! # fn main() -> anyhow::Result<()> {
20//! // Extract all IPs (default: includes private, loopback, broadcast)
21//! let extractor = ExtractorBuilder::new().build()?;
22//!
23//! let input = b"Connect from 192.168.1.1 to 2001:db8::1";
24//! for range in extractor.find_iter(input) {
25//!     let ip = std::str::from_utf8(&input[range])?;
26//!     println!("Found: {}", ip);
27//! }
28//! # Ok(())
29//! # }
30//! ```
31//!
32//! ## Tagging and Output
33//!
34//! For more structured output (e.g., JSON), use the `Tagged` and `Tag` types:
35//!
36//! ```no_run
37//! use ip_extract::{ExtractorBuilder, Tagged, Tag};
38//!
39//! # fn main() -> anyhow::Result<()> {
40//! let extractor = ExtractorBuilder::new().build()?;
41//! let data = b"Server at 8.8.8.8";
42//! let mut tagged = Tagged::new(data);
43//!
44//! for range in extractor.find_iter(data) {
45//!     let ip = std::str::from_utf8(&data[range.clone()])?;
46//!     let tag = Tag::new(ip).with_range(range);
47//!     tagged = tagged.tag(tag);
48//! }
49//! # Ok(())
50//! # }
51//! ```
52//!
53//! ## Configuration
54//!
55//! Use `ExtractorBuilder` to filter specific IP categories:
56//!
57//! ```no_run
58//! use ip_extract::ExtractorBuilder;
59//!
60//! # fn main() -> anyhow::Result<()> {
61//! // Extract only publicly routable IPs
62//! let extractor = ExtractorBuilder::new()
63//!     .only_public()
64//!     .build()?;
65//!
66//! // Or use granular control
67//! let extractor = ExtractorBuilder::new()
68//!     .ipv4(true)            // Extract IPv4 (default: true)
69//!     .ipv6(false)           // Skip IPv6
70//!     .ignore_private()      // Skip RFC 1918 ranges
71//!     .ignore_loopback()     // Skip loopback (127.0.0.1, ::1)
72//!     .build()?;
73//! # Ok(())
74//! # }
75//! ```
76//!
77//! ## Performance
78//!
79//! Typical throughput on modern hardware:
80//! - Dense IPs (mostly IP addresses): **160+ MiB/s**
81//! - Sparse logs (IPs mixed with text): **360+ MiB/s**
82//! - No IPs (pure scanning): **620+ MiB/s**
83//!
84//! See `benches/ip_benchmark.rs` for details.
85
86use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
87use std::ops::Range;
88use std::sync::OnceLock;
89
90use regex_automata::dfa::dense::DFA;
91use regex_automata::dfa::Automaton;
92use regex_automata::Input;
93
94mod tag;
95pub use tag::{Tag, Tagged, TextData};
96
97// Pre-compiled DFA bytes
98static IPV4_DFA_BYTES: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/ipv4_only.dfa"));
99static IPV6_DFA_BYTES: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/ipv6_only.dfa"));
100static BOTH_DFA_BYTES: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/both.dfa"));
101
102static DFA_IPV4: OnceLock<&'static DFA<&'static [u32]>> = OnceLock::new();
103static DFA_IPV6: OnceLock<&'static DFA<&'static [u32]>> = OnceLock::new();
104static DFA_BOTH: OnceLock<&'static DFA<&'static [u32]>> = OnceLock::new();
105
106/// Deserialize a pre-compiled DFA from binary bytes with zero-copy semantics.
107///
108/// This function performs a critical performance trick: the DFA is built at compile time
109/// and embedded in the binary as raw bytes. At runtime, we need to:
110///
111/// 1. **Align the bytes**: `regex-automata`'s DFA format requires u32-aligned data for
112///    efficient deserialization. The bytes from `include_bytes!()` are byte-aligned, so
113///    we allocate a u32 buffer and copy the bytes into it.
114///
115/// 2. **Leak for 'static lifetime**: We use `Box::leak()` to convert the heap-allocated
116///    buffer into a `&'static` reference. This is intentional: the DFA lives for the entire
117///    program duration, so the memory is never freed. This enables zero-cost initialization
118///    via `OnceLock` on first use.
119///
120/// 3. **Deserialize in-place**: `DFA::from_bytes()` reconstructs the DFA structure from
121///    the aligned bytes without copying. The resulting DFA holds references into the leaked
122///    buffer.
123///
124/// # Why This Approach?
125///
126/// - **Zero runtime allocation after first call**: Subsequent calls return the cached DFA
127/// - **Zero runtime regex compilation**: The DFA is already built at compile time
128/// - **Minimal binary overhead**: Only one copy of the DFA (serialized) is embedded
129///
130/// # Safety
131///
132/// - `copy_nonoverlapping`: Safe because bytes and storage don't overlap
133/// - `from_raw_parts`: Safe because `storage_ref` points to valid, initialized data
134/// - `Box::leak`: Safe because DFA is never dropped (program lifetime)
135fn load_dfa(bytes: &'static [u8]) -> &'static DFA<&'static [u32]> {
136    // Allocate u32 buffer sized to hold all bytes (rounded up)
137    let len = bytes.len();
138    let cap = len.div_ceil(4);
139    let mut storage = vec![0u32; cap];
140
141    // Copy byte data into the u32-aligned buffer
142    unsafe {
143        std::ptr::copy_nonoverlapping(bytes.as_ptr(), storage.as_mut_ptr() as *mut u8, len);
144    }
145
146    // Leak the buffer to get a 'static mutable reference
147    let storage_ref: &'static mut [u32] = Box::leak(storage.into_boxed_slice());
148
149    // Reconstruct the byte slice from the u32 buffer (zero-copy)
150    let aligned_slice =
151        unsafe { std::slice::from_raw_parts(storage_ref.as_ptr() as *const u8, len) };
152
153    // Deserialize the DFA from the aligned bytes
154    let (dfa, _) = DFA::from_bytes(aligned_slice).expect("valid dfa from build.rs");
155
156    // Leak the DFA itself for a 'static lifetime
157    Box::leak(Box::new(dfa))
158}
159
160fn get_ipv4_dfa() -> &'static DFA<&'static [u32]> {
161    DFA_IPV4.get_or_init(|| load_dfa(IPV4_DFA_BYTES))
162}
163fn get_ipv6_dfa() -> &'static DFA<&'static [u32]> {
164    DFA_IPV6.get_or_init(|| load_dfa(IPV6_DFA_BYTES))
165}
166fn get_both_dfa() -> &'static DFA<&'static [u32]> {
167    DFA_BOTH.get_or_init(|| load_dfa(BOTH_DFA_BYTES))
168}
169
170#[derive(Clone, Debug)]
171enum ValidatorType {
172    IPv4 {
173        include_private: bool,
174        include_loopback: bool,
175        include_broadcast: bool,
176    },
177    IPv6 {
178        include_private: bool,
179        include_loopback: bool,
180    },
181}
182
183impl ValidatorType {
184    #[inline(always)]
185    fn validate(&self, bytes: &[u8]) -> bool {
186        match *self {
187            ValidatorType::IPv4 {
188                include_private,
189                include_loopback,
190                include_broadcast,
191            } => validate_ipv4(bytes, include_private, include_loopback, include_broadcast),
192            ValidatorType::IPv6 {
193                include_private,
194                include_loopback,
195            } => validate_ipv6(bytes, include_private, include_loopback),
196        }
197    }
198}
199
200/// The main IP address extractor.
201///
202/// An `Extractor` scans byte slices for IPv4 and/or IPv6 addresses, applying configurable
203/// filters to include or exclude certain address classes (private, loopback, broadcast).
204///
205/// Extractors are best created via [`ExtractorBuilder`] and are designed to be reused
206/// across many calls to `find_iter` for maximum efficiency.
207///
208/// # Bytes vs. Strings
209///
210/// This extractor works directly on byte slices rather than strings. This avoids UTF-8
211/// validation overhead and enables zero-copy scanning of very large inputs.
212///
213/// # Performance
214///
215/// The extractor uses a compile-time DFA (Deterministic Finite Automaton) for O(n)
216/// scanning with minimal overhead. See the crate-level documentation for throughput benchmarks.
217pub struct Extractor {
218    dfa: &'static DFA<&'static [u32]>,
219    validators: Vec<ValidatorType>,
220}
221
222impl Extractor {
223    /// Find all IP addresses in a byte slice.
224    ///
225    /// Returns an iterator of byte ranges `[start, end)` pointing to each IP address found.
226    /// Ranges are guaranteed to be valid indices into `haystack`.
227    ///
228    /// # Example
229    ///
230    /// ```no_run
231    /// use ip_extract::ExtractorBuilder;
232    ///
233    /// # fn main() -> anyhow::Result<()> {
234    /// let extractor = ExtractorBuilder::new().build()?;
235    /// let data = b"Log: 192.168.1.1 sent request to 8.8.8.8";
236    ///
237    /// for range in extractor.find_iter(data) {
238    ///     let ip = std::str::from_utf8(&data[range]).unwrap();
239    ///     println!("IP: {}", ip);
240    /// }
241    /// # Ok(())
242    /// # }
243    /// ```
244    ///
245    /// # Arguments
246    ///
247    /// * `haystack` - A byte slice to search for IP addresses.
248    ///
249    /// # Returns
250    ///
251    /// An iterator yielding byte ranges for each valid IP address found.
252    #[inline]
253    pub fn find_iter<'a>(&'a self, haystack: &'a [u8]) -> impl Iterator<Item = Range<usize>> + 'a {
254        let mut input = Input::new(haystack);
255
256        std::iter::from_fn(move || {
257            loop {
258                // We use the specialized try_search_fwd method.
259                // For a dense DFA, this is the core scanning loop.
260                let Ok(Some(m)) = self.dfa.try_search_fwd(&input) else {
261                    return None;
262                };
263
264                let end = m.offset();
265                let pid = m.pattern().as_usize();
266                let validator = &self.validators[pid];
267
268                // Backtrack to find the start. Max IPv6 is 39 bytes, use 40 for safety margin.
269                let mut start_scan = end.saturating_sub(40);
270                while start_scan < end && !is_ip_char(haystack[start_scan]) {
271                    start_scan += 1;
272                }
273
274                let mut actual_start = None;
275                for s in start_scan..end {
276                    if s > 0 && is_ip_char(haystack[s - 1]) {
277                        continue;
278                    }
279
280                    if validator.validate(&haystack[s..end]) {
281                        // Right boundary check: ensure the IP isn't part of a longer sequence
282                        // For IPv4: allow trailing dots (sentence endings) but reject digits
283                        // For IPv6: reject digits, hex letters, dots, or colons
284                        let valid_boundary = if end < haystack.len() {
285                            let next_char = haystack[end];
286                            match validator {
287                                ValidatorType::IPv4 { .. } => {
288                                    // Reject digits immediately after (e.g., "1.2.3.4" followed by "5")
289                                    // Reject dot+digit combination (e.g., "1.2.3.4.5")
290                                    if next_char.is_ascii_digit() {
291                                        false
292                                    } else if next_char == b'.' && end + 1 < haystack.len() {
293                                        // If next is a dot, check if it's followed by a digit
294                                        !haystack[end + 1].is_ascii_digit()
295                                    } else {
296                                        true
297                                    }
298                                }
299                                ValidatorType::IPv6 { .. } => {
300                                    // Reject all IP characters for IPv6
301                                    !is_ip_char(next_char)
302                                }
303                            }
304                        } else {
305                            true
306                        };
307
308                        if !valid_boundary {
309                            break;
310                        }
311                        actual_start = Some(s..end);
312                        break;
313                    }
314                }
315
316                // Advance input.
317                input.set_start(end);
318
319                if let Some(range) = actual_start {
320                    return Some(range);
321                }
322
323                if end >= haystack.len() {
324                    return None;
325                }
326            }
327        })
328    }
329}
330
331#[inline(always)]
332fn is_ip_char(b: u8) -> bool {
333    matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'.' | b':')
334}
335
336/// A builder for configuring IP extraction behavior.
337///
338/// Use `ExtractorBuilder` to specify which types of IP addresses should be extracted.
339/// By default, it extracts both IPv4 and IPv6 but excludes private, loopback, and
340/// broadcast addresses.
341///
342/// # Example
343///
344/// ```no_run
345/// use ip_extract::ExtractorBuilder;
346///
347/// # fn main() -> anyhow::Result<()> {
348/// let extractor = ExtractorBuilder::new()
349///     .ipv4(true)
350///     .ipv6(false)  // Only IPv4
351///     .private_ips(true)  // Include private ranges
352///     .build()?;
353/// # Ok(())
354/// # }
355/// ```
356pub struct ExtractorBuilder {
357    include_ipv4: bool,
358    include_ipv6: bool,
359    include_private: bool,
360    include_loopback: bool,
361    include_broadcast: bool,
362}
363
364impl Default for ExtractorBuilder {
365    fn default() -> Self {
366        Self::new()
367    }
368}
369
370impl ExtractorBuilder {
371    /// Create a new builder with default settings.
372    ///
373    /// By default, **all IP addresses are extracted** (principle of least surprise).
374    /// Use `.only_public()` or `.ignore_*()` methods to filter specific categories.
375    ///
376    /// Defaults:
377    /// - IPv4: enabled
378    /// - IPv6: enabled
379    /// - Private IPs: **enabled** (10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, fc00::/7)
380    /// - Loopback IPs: **enabled** (127.0.0.0/8, ::1)
381    /// - Broadcast IPs: **enabled** (255.255.255.255, link-local)
382    ///
383    /// # Examples
384    ///
385    /// ```no_run
386    /// use ip_extract::ExtractorBuilder;
387    ///
388    /// # fn main() -> anyhow::Result<()> {
389    /// // Extract all IPs (default)
390    /// let extractor = ExtractorBuilder::new().build()?;
391    ///
392    /// // Extract only public IPs
393    /// let extractor = ExtractorBuilder::new().only_public().build()?;
394    ///
395    /// // Granular control
396    /// let extractor = ExtractorBuilder::new()
397    ///     .ignore_private()
398    ///     .ignore_loopback()
399    ///     .build()?;
400    /// # Ok(())
401    /// # }
402    /// ```
403    #[must_use]
404    pub fn new() -> Self {
405        Self {
406            include_ipv4: true,
407            include_ipv6: true,
408            include_private: true,
409            include_loopback: true,
410            include_broadcast: true,
411        }
412    }
413    /// Enable or disable IPv4 address extraction.
414    ///
415    /// Default: `true`
416    pub fn ipv4(&mut self, include: bool) -> &mut Self {
417        self.include_ipv4 = include;
418        self
419    }
420
421    /// Enable or disable IPv6 address extraction.
422    ///
423    /// Default: `true`
424    pub fn ipv6(&mut self, include: bool) -> &mut Self {
425        self.include_ipv6 = include;
426        self
427    }
428
429    /// Include private IP addresses (RFC 1918 for IPv4, ULA for IPv6).
430    ///
431    /// Private ranges include:
432    /// - IPv4: 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16
433    /// - IPv6: fc00::/7 (ULA), fe80::/10 (link-local)
434    ///
435    /// Default: `true`
436    pub fn private_ips(&mut self, include: bool) -> &mut Self {
437        self.include_private = include;
438        self
439    }
440
441    /// Include loopback addresses.
442    ///
443    /// Loopback ranges:
444    /// - IPv4: 127.0.0.0/8
445    /// - IPv6: ::1
446    ///
447    /// Default: `true`
448    pub fn loopback_ips(&mut self, include: bool) -> &mut Self {
449        self.include_loopback = include;
450        self
451    }
452
453    /// Include broadcast addresses.
454    ///
455    /// Covers:
456    /// - IPv4: 255.255.255.255 and link-local (169.254.0.0/16)
457    /// - IPv6: link-local and other special ranges
458    ///
459    /// Default: `true`
460    pub fn broadcast_ips(&mut self, include: bool) -> &mut Self {
461        self.include_broadcast = include;
462        self
463    }
464
465    /// Ignore private IP addresses (convenience for `.private_ips(false)`).
466    ///
467    /// Excludes:
468    /// - IPv4: 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16
469    /// - IPv6: fc00::/7 (ULA), fe80::/10 (link-local)
470    pub fn ignore_private(&mut self) -> &mut Self {
471        self.include_private = false;
472        self
473    }
474
475    /// Ignore loopback addresses (convenience for `.loopback_ips(false)`).
476    ///
477    /// Excludes:
478    /// - IPv4: 127.0.0.0/8
479    /// - IPv6: ::1
480    pub fn ignore_loopback(&mut self) -> &mut Self {
481        self.include_loopback = false;
482        self
483    }
484
485    /// Ignore broadcast addresses (convenience for `.broadcast_ips(false)`).
486    ///
487    /// Excludes:
488    /// - IPv4: 255.255.255.255 and link-local (169.254.0.0/16)
489    /// - IPv6: link-local and other special ranges
490    pub fn ignore_broadcast(&mut self) -> &mut Self {
491        self.include_broadcast = false;
492        self
493    }
494
495    /// Extract only publicly routable IP addresses.
496    ///
497    /// This is a convenience method equivalent to:
498    /// ```
499    /// # use ip_extract::ExtractorBuilder;
500    /// # let mut builder = ExtractorBuilder::new();
501    /// builder
502    ///     .ignore_private()
503    ///     .ignore_loopback()
504    ///     .ignore_broadcast();
505    /// ```
506    ///
507    /// Excludes:
508    /// - Private: RFC 1918 (IPv4), ULA (IPv6)
509    /// - Loopback: 127.0.0.0/8, ::1
510    /// - Broadcast: 255.255.255.255, link-local ranges
511    ///
512    /// # Example
513    ///
514    /// ```no_run
515    /// use ip_extract::ExtractorBuilder;
516    ///
517    /// # fn main() -> anyhow::Result<()> {
518    /// let extractor = ExtractorBuilder::new()
519    ///     .only_public()
520    ///     .build()?;
521    /// # Ok(())
522    /// # }
523    /// ```
524    pub fn only_public(&mut self) -> &mut Self {
525        self.include_private = false;
526        self.include_loopback = false;
527        self.include_broadcast = false;
528        self
529    }
530
531    /// Build and return an `Extractor` with the configured settings.
532    ///
533    /// # Errors
534    ///
535    /// Returns an error if no IP version (IPv4 or IPv6) is enabled. At least one
536    /// must be selected.
537    ///
538    /// # Example
539    ///
540    /// ```no_run
541    /// use ip_extract::ExtractorBuilder;
542    ///
543    /// # fn main() -> anyhow::Result<()> {
544    /// let extractor = ExtractorBuilder::new()
545    ///     .ipv4(true)
546    ///     .ipv6(true)
547    ///     .build()?;
548    /// # Ok(())
549    /// # }
550    /// ```
551    pub fn build(&self) -> anyhow::Result<Extractor> {
552        let (dfa, validators) = match (self.include_ipv4, self.include_ipv6) {
553            (true, true) => (
554                get_both_dfa(),
555                vec![
556                    ValidatorType::IPv4 {
557                        include_private: self.include_private,
558                        include_loopback: self.include_loopback,
559                        include_broadcast: self.include_broadcast,
560                    },
561                    ValidatorType::IPv6 {
562                        include_private: self.include_private,
563                        include_loopback: self.include_loopback,
564                    },
565                ],
566            ),
567            (true, false) => (
568                get_ipv4_dfa(),
569                vec![ValidatorType::IPv4 {
570                    include_private: self.include_private,
571                    include_loopback: self.include_loopback,
572                    include_broadcast: self.include_broadcast,
573                }],
574            ),
575            (false, true) => (
576                get_ipv6_dfa(),
577                vec![ValidatorType::IPv6 {
578                    include_private: self.include_private,
579                    include_loopback: self.include_loopback,
580                }],
581            ),
582            _ => anyhow::bail!("No IP address patterns selected"),
583        };
584        Ok(Extractor { dfa, validators })
585    }
586}
587
588/// Validate an IPv4 address from a byte slice, applying filters.
589///
590/// This function uses `parse_ipv4_bytes` for strict validation and then checks
591/// against the provided inclusion filters.
592///
593/// # Arguments
594///
595/// * `bytes` - Candidate byte slice to validate.
596/// * `include_private` - Whether to include RFC 1918 addresses.
597/// * `include_loopback` - Whether to include 127.0.0.0/8 addresses.
598/// * `include_broadcast` - Whether to include broadcast and link-local addresses.
599#[inline]
600fn validate_ipv4(
601    bytes: &[u8],
602    include_private: bool,
603    include_loopback: bool,
604    include_broadcast: bool,
605) -> bool {
606    let Some(ipv4) = parse_ipv4_bytes(bytes) else {
607        return false;
608    };
609
610    if !include_private && ipv4.is_private() {
611        return false;
612    }
613    if !include_loopback && ipv4.is_loopback() {
614        return false;
615    }
616    if !include_broadcast && (ipv4.is_broadcast() || ipv4.is_link_local()) {
617        return false;
618    }
619    true
620}
621
622/// Parse an IPv4 address from a byte slice.
623///
624/// Performs strict validation of dotted-quad notation (e.g., `192.168.1.1`).
625/// Rejects:
626/// - Octet values > 255
627/// - Leading zeros (e.g., `192.168.001.1`)
628/// - Invalid formats
629///
630/// # Arguments
631///
632/// * `bytes` - A byte slice containing a potential IPv4 address (7-15 bytes)
633///
634/// # Returns
635///
636/// `Some(Ipv4Addr)` if the bytes represent a valid IPv4 address, `None` otherwise.
637///
638/// # Example
639///
640/// ```
641/// use ip_extract::parse_ipv4_bytes;
642///
643/// assert_eq!(parse_ipv4_bytes(b"192.168.1.1"), Some("192.168.1.1".parse().unwrap()));
644/// assert_eq!(parse_ipv4_bytes(b"256.1.1.1"), None);  // Out of range
645/// assert_eq!(parse_ipv4_bytes(b"192.168.01.1"), None);  // Leading zero
646/// ```
647#[must_use]
648#[inline]
649pub fn parse_ipv4_bytes(bytes: &[u8]) -> Option<Ipv4Addr> {
650    if bytes.len() < 7 || bytes.len() > 15 {
651        return None;
652    }
653    let mut octets = [0u8; 4];
654    let mut octet_idx = 0;
655    let mut current_val = 0u16;
656    let mut digits_in_octet = 0;
657    for &b in bytes {
658        match b {
659            b'.' => {
660                if digits_in_octet == 0 || octet_idx == 3 {
661                    return None;
662                }
663                #[allow(clippy::cast_possible_truncation)]
664                {
665                    octets[octet_idx] = current_val as u8;
666                }
667                octet_idx += 1;
668                current_val = 0;
669                digits_in_octet = 0;
670            }
671            b'0'..=b'9' => {
672                let digit = u16::from(b - b'0');
673                if digits_in_octet > 0 && current_val == 0 {
674                    return None;
675                }
676                current_val = current_val * 10 + digit;
677                if current_val > 255 {
678                    return None;
679                }
680                digits_in_octet += 1;
681            }
682            _ => return None,
683        }
684    }
685    if octet_idx != 3 || digits_in_octet == 0 {
686        return None;
687    }
688    #[allow(clippy::cast_possible_truncation)]
689    {
690        octets[3] = current_val as u8;
691    }
692    Some(Ipv4Addr::new(octets[0], octets[1], octets[2], octets[3]))
693}
694
695/// Check if an IPv6 address is a Unique Local Address (ULA) per RFC 4193.
696/// ULA addresses are in the fc00::/7 range (fc00:: to fdff::).
697#[inline]
698fn is_unique_local(ip: &Ipv6Addr) -> bool {
699    matches!(ip.octets()[0], 0xfc | 0xfd)
700}
701
702/// Validate an IPv6 address from a byte slice, applying filters.
703///
704/// This function performs parsing and category-based filtering. It uses
705/// `unsafe` `from_utf8_unchecked` for performance, as the candidates are
706/// already filtered by the DFA for IP-like characters.
707///
708/// # Arguments
709///
710/// * `bytes` - Candidate byte slice to validate.
711/// * `include_private` - Whether to include ULA and link-local addresses.
712/// * `include_loopback` - Whether to include the loopback address (`::1`).
713#[inline]
714fn validate_ipv6(bytes: &[u8], include_private: bool, include_loopback: bool) -> bool {
715    if bytes.len() < 2 {
716        return false;
717    }
718    let s = unsafe { std::str::from_utf8_unchecked(bytes) };
719    let Ok(ip) = s.parse::<IpAddr>() else {
720        return false;
721    };
722
723    match ip {
724        IpAddr::V6(ipv6) => {
725            if !include_private && (ipv6.is_unicast_link_local() || is_unique_local(&ipv6)) {
726                return false;
727            }
728            if !include_loopback && ipv6.is_loopback() {
729                return false;
730            }
731            true
732        }
733        IpAddr::V4(_) => false,
734    }
735}
736
737impl std::fmt::Debug for Extractor {
738    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
739        f.debug_struct("Extractor")
740            .field("validators", &self.validators)
741            .finish()
742    }
743}