ip_extract/lib.rs
1//! High-performance IP address extraction and tagging engine.
2//!
3//! `ip-extract` provides a blazingly fast, configurable extractor for finding IPv4 and IPv6
4//! addresses in unstructured text. It achieves maximum throughput through:
5//!
6//! - **Compile-time DFA**: IP patterns are converted to dense Forward DFAs during build,
7//! eliminating runtime regex compilation and heap allocation.
8//! - **Zero-overhead scanning**: The DFA scans at O(n) with no backtracking; validation
9//! is performed only on candidates.
10//! - **Strict validation**: Deep checks eliminate false positives (e.g., `1.2.3.4.5` is rejected).
11//!
12//! ## Quick Start
13//!
14//! By default, **all IP addresses are extracted**:
15//!
16//! ```no_run
17//! use ip_extract::ExtractorBuilder;
18//!
19//! # fn main() -> anyhow::Result<()> {
20//! // Extract all IPs (default: includes private, loopback, broadcast)
21//! let extractor = ExtractorBuilder::new().build()?;
22//!
23//! let input = b"Connect from 192.168.1.1 to 2001:db8::1";
24//! for range in extractor.find_iter(input) {
25//! let ip = std::str::from_utf8(&input[range])?;
26//! println!("Found: {}", ip);
27//! }
28//! # Ok(())
29//! # }
30//! ```
31//!
32//! ## Tagging and Output
33//!
34//! For more structured output (e.g., JSON), use the `Tagged` and `Tag` types:
35//!
36//! ```no_run
37//! use ip_extract::{ExtractorBuilder, Tagged, Tag};
38//!
39//! # fn main() -> anyhow::Result<()> {
40//! let extractor = ExtractorBuilder::new().build()?;
41//! let data = b"Server at 8.8.8.8";
42//! let mut tagged = Tagged::new(data);
43//!
44//! for range in extractor.find_iter(data) {
45//! let ip = std::str::from_utf8(&data[range.clone()])?;
46//! let tag = Tag::new(ip).with_range(range);
47//! tagged = tagged.tag(tag);
48//! }
49//! # Ok(())
50//! # }
51//! ```
52//!
53//! ## Configuration
54//!
55//! Use `ExtractorBuilder` to filter specific IP categories:
56//!
57//! ```no_run
58//! use ip_extract::ExtractorBuilder;
59//!
60//! # fn main() -> anyhow::Result<()> {
61//! // Extract only publicly routable IPs
62//! let extractor = ExtractorBuilder::new()
63//! .only_public()
64//! .build()?;
65//!
66//! // Or use granular control
67//! let extractor = ExtractorBuilder::new()
68//! .ipv4(true) // Extract IPv4 (default: true)
69//! .ipv6(false) // Skip IPv6
70//! .ignore_private() // Skip RFC 1918 ranges
71//! .ignore_loopback() // Skip loopback (127.0.0.1, ::1)
72//! .build()?;
73//! # Ok(())
74//! # }
75//! ```
76//!
77//! ## Performance
78//!
79//! Typical throughput on modern hardware:
80//! - Dense IPs (mostly IP addresses): **160+ MiB/s**
81//! - Sparse logs (IPs mixed with text): **360+ MiB/s**
82//! - No IPs (pure scanning): **620+ MiB/s**
83//!
84//! See `benches/ip_benchmark.rs` for details.
85
86use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
87use std::ops::Range;
88use std::sync::OnceLock;
89
90use regex_automata::dfa::dense::DFA;
91use regex_automata::dfa::Automaton;
92use regex_automata::Input;
93
94mod tag;
95pub use tag::{Tag, Tagged, TextData};
96
97// Pre-compiled DFA bytes
98static IPV4_DFA_BYTES: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/ipv4_only.dfa"));
99static IPV6_DFA_BYTES: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/ipv6_only.dfa"));
100static BOTH_DFA_BYTES: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/both.dfa"));
101
102static DFA_IPV4: OnceLock<&'static DFA<&'static [u32]>> = OnceLock::new();
103static DFA_IPV6: OnceLock<&'static DFA<&'static [u32]>> = OnceLock::new();
104static DFA_BOTH: OnceLock<&'static DFA<&'static [u32]>> = OnceLock::new();
105
106/// Deserialize a pre-compiled DFA from binary bytes with zero-copy semantics.
107///
108/// This function performs a critical performance trick: the DFA is built at compile time
109/// and embedded in the binary as raw bytes. At runtime, we need to:
110///
111/// 1. **Align the bytes**: `regex-automata`'s DFA format requires u32-aligned data for
112/// efficient deserialization. The bytes from `include_bytes!()` are byte-aligned, so
113/// we allocate a u32 buffer and copy the bytes into it.
114///
115/// 2. **Leak for 'static lifetime**: We use `Box::leak()` to convert the heap-allocated
116/// buffer into a `&'static` reference. This is intentional: the DFA lives for the entire
117/// program duration, so the memory is never freed. This enables zero-cost initialization
118/// via `OnceLock` on first use.
119///
120/// 3. **Deserialize in-place**: `DFA::from_bytes()` reconstructs the DFA structure from
121/// the aligned bytes without copying. The resulting DFA holds references into the leaked
122/// buffer.
123///
124/// # Why This Approach?
125///
126/// - **Zero runtime allocation after first call**: Subsequent calls return the cached DFA
127/// - **Zero runtime regex compilation**: The DFA is already built at compile time
128/// - **Minimal binary overhead**: Only one copy of the DFA (serialized) is embedded
129///
130/// # Safety
131///
132/// - `copy_nonoverlapping`: Safe because bytes and storage don't overlap
133/// - `from_raw_parts`: Safe because `storage_ref` points to valid, initialized data
134/// - `Box::leak`: Safe because DFA is never dropped (program lifetime)
135fn load_dfa(bytes: &'static [u8]) -> &'static DFA<&'static [u32]> {
136 // Allocate u32 buffer sized to hold all bytes (rounded up)
137 let len = bytes.len();
138 let cap = len.div_ceil(4);
139 let mut storage = vec![0u32; cap];
140
141 // Copy byte data into the u32-aligned buffer
142 unsafe {
143 std::ptr::copy_nonoverlapping(bytes.as_ptr(), storage.as_mut_ptr() as *mut u8, len);
144 }
145
146 // Leak the buffer to get a 'static mutable reference
147 let storage_ref: &'static mut [u32] = Box::leak(storage.into_boxed_slice());
148
149 // Reconstruct the byte slice from the u32 buffer (zero-copy)
150 let aligned_slice =
151 unsafe { std::slice::from_raw_parts(storage_ref.as_ptr() as *const u8, len) };
152
153 // Deserialize the DFA from the aligned bytes
154 let (dfa, _) = DFA::from_bytes(aligned_slice).expect("valid dfa from build.rs");
155
156 // Leak the DFA itself for a 'static lifetime
157 Box::leak(Box::new(dfa))
158}
159
160fn get_ipv4_dfa() -> &'static DFA<&'static [u32]> {
161 DFA_IPV4.get_or_init(|| load_dfa(IPV4_DFA_BYTES))
162}
163fn get_ipv6_dfa() -> &'static DFA<&'static [u32]> {
164 DFA_IPV6.get_or_init(|| load_dfa(IPV6_DFA_BYTES))
165}
166fn get_both_dfa() -> &'static DFA<&'static [u32]> {
167 DFA_BOTH.get_or_init(|| load_dfa(BOTH_DFA_BYTES))
168}
169
170#[derive(Clone, Debug)]
171enum ValidatorType {
172 IPv4 {
173 include_private: bool,
174 include_loopback: bool,
175 include_broadcast: bool,
176 },
177 IPv6 {
178 include_private: bool,
179 include_loopback: bool,
180 },
181}
182
183impl ValidatorType {
184 #[inline(always)]
185 fn validate(&self, bytes: &[u8]) -> bool {
186 match *self {
187 ValidatorType::IPv4 {
188 include_private,
189 include_loopback,
190 include_broadcast,
191 } => validate_ipv4(bytes, include_private, include_loopback, include_broadcast),
192 ValidatorType::IPv6 {
193 include_private,
194 include_loopback,
195 } => validate_ipv6(bytes, include_private, include_loopback),
196 }
197 }
198}
199
200/// The main IP address extractor.
201///
202/// An `Extractor` scans byte slices for IPv4 and/or IPv6 addresses, applying configurable
203/// filters to include or exclude certain address classes (private, loopback, broadcast).
204///
205/// Extractors are best created via [`ExtractorBuilder`] and are designed to be reused
206/// across many calls to `find_iter` for maximum efficiency.
207///
208/// # Bytes vs. Strings
209///
210/// This extractor works directly on byte slices rather than strings. This avoids UTF-8
211/// validation overhead and enables zero-copy scanning of very large inputs.
212///
213/// # Performance
214///
215/// The extractor uses a compile-time DFA (Deterministic Finite Automaton) for O(n)
216/// scanning with minimal overhead. See the crate-level documentation for throughput benchmarks.
217pub struct Extractor {
218 dfa: &'static DFA<&'static [u32]>,
219 validators: Vec<ValidatorType>,
220}
221
222impl Extractor {
223 /// Find all IP addresses in a byte slice.
224 ///
225 /// Returns an iterator of byte ranges `[start, end)` pointing to each IP address found.
226 /// Ranges are guaranteed to be valid indices into `haystack`.
227 ///
228 /// # Example
229 ///
230 /// ```no_run
231 /// use ip_extract::ExtractorBuilder;
232 ///
233 /// # fn main() -> anyhow::Result<()> {
234 /// let extractor = ExtractorBuilder::new().build()?;
235 /// let data = b"Log: 192.168.1.1 sent request to 8.8.8.8";
236 ///
237 /// for range in extractor.find_iter(data) {
238 /// let ip = std::str::from_utf8(&data[range]).unwrap();
239 /// println!("IP: {}", ip);
240 /// }
241 /// # Ok(())
242 /// # }
243 /// ```
244 ///
245 /// # Arguments
246 ///
247 /// * `haystack` - A byte slice to search for IP addresses.
248 ///
249 /// # Returns
250 ///
251 /// An iterator yielding byte ranges for each valid IP address found.
252 #[inline]
253 pub fn find_iter<'a>(&'a self, haystack: &'a [u8]) -> impl Iterator<Item = Range<usize>> + 'a {
254 let mut input = Input::new(haystack);
255
256 std::iter::from_fn(move || {
257 loop {
258 // We use the specialized try_search_fwd method.
259 // For a dense DFA, this is the core scanning loop.
260 let Ok(Some(m)) = self.dfa.try_search_fwd(&input) else {
261 return None;
262 };
263
264 let end = m.offset();
265 let pid = m.pattern().as_usize();
266 let validator = &self.validators[pid];
267
268 // Backtrack to find the start. Max IPv6 is 39 bytes, use 40 for safety margin.
269 let mut start_scan = end.saturating_sub(40);
270 while start_scan < end && !is_ip_char(haystack[start_scan]) {
271 start_scan += 1;
272 }
273
274 let mut actual_start = None;
275 for s in start_scan..end {
276 if s > 0 && is_ip_char(haystack[s - 1]) {
277 continue;
278 }
279
280 if validator.validate(&haystack[s..end]) {
281 // Right boundary check: ensure the IP isn't part of a longer sequence
282 // For IPv4: allow trailing dots (sentence endings) but reject digits
283 // For IPv6: reject digits, hex letters, dots, or colons
284 let valid_boundary = if end < haystack.len() {
285 let next_char = haystack[end];
286 match validator {
287 ValidatorType::IPv4 { .. } => {
288 // Reject digits immediately after (e.g., "1.2.3.4" followed by "5")
289 // Reject dot+digit combination (e.g., "1.2.3.4.5")
290 if next_char.is_ascii_digit() {
291 false
292 } else if next_char == b'.' && end + 1 < haystack.len() {
293 // If next is a dot, check if it's followed by a digit
294 !haystack[end + 1].is_ascii_digit()
295 } else {
296 true
297 }
298 }
299 ValidatorType::IPv6 { .. } => {
300 // Reject all IP characters for IPv6
301 !is_ip_char(next_char)
302 }
303 }
304 } else {
305 true
306 };
307
308 if !valid_boundary {
309 break;
310 }
311 actual_start = Some(s..end);
312 break;
313 }
314 }
315
316 // Advance input.
317 input.set_start(end);
318
319 if let Some(range) = actual_start {
320 return Some(range);
321 }
322
323 if end >= haystack.len() {
324 return None;
325 }
326 }
327 })
328 }
329}
330
331#[inline(always)]
332fn is_ip_char(b: u8) -> bool {
333 matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'.' | b':')
334}
335
336/// A builder for configuring IP extraction behavior.
337///
338/// Use `ExtractorBuilder` to specify which types of IP addresses should be extracted.
339/// By default, it extracts both IPv4 and IPv6 but excludes private, loopback, and
340/// broadcast addresses.
341///
342/// # Example
343///
344/// ```no_run
345/// use ip_extract::ExtractorBuilder;
346///
347/// # fn main() -> anyhow::Result<()> {
348/// let extractor = ExtractorBuilder::new()
349/// .ipv4(true)
350/// .ipv6(false) // Only IPv4
351/// .private_ips(true) // Include private ranges
352/// .build()?;
353/// # Ok(())
354/// # }
355/// ```
356pub struct ExtractorBuilder {
357 include_ipv4: bool,
358 include_ipv6: bool,
359 include_private: bool,
360 include_loopback: bool,
361 include_broadcast: bool,
362}
363
364impl Default for ExtractorBuilder {
365 fn default() -> Self {
366 Self::new()
367 }
368}
369
370impl ExtractorBuilder {
371 /// Create a new builder with default settings.
372 ///
373 /// By default, **all IP addresses are extracted** (principle of least surprise).
374 /// Use `.only_public()` or `.ignore_*()` methods to filter specific categories.
375 ///
376 /// Defaults:
377 /// - IPv4: enabled
378 /// - IPv6: enabled
379 /// - Private IPs: **enabled** (10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, fc00::/7)
380 /// - Loopback IPs: **enabled** (127.0.0.0/8, ::1)
381 /// - Broadcast IPs: **enabled** (255.255.255.255, link-local)
382 ///
383 /// # Examples
384 ///
385 /// ```no_run
386 /// use ip_extract::ExtractorBuilder;
387 ///
388 /// # fn main() -> anyhow::Result<()> {
389 /// // Extract all IPs (default)
390 /// let extractor = ExtractorBuilder::new().build()?;
391 ///
392 /// // Extract only public IPs
393 /// let extractor = ExtractorBuilder::new().only_public().build()?;
394 ///
395 /// // Granular control
396 /// let extractor = ExtractorBuilder::new()
397 /// .ignore_private()
398 /// .ignore_loopback()
399 /// .build()?;
400 /// # Ok(())
401 /// # }
402 /// ```
403 #[must_use]
404 pub fn new() -> Self {
405 Self {
406 include_ipv4: true,
407 include_ipv6: true,
408 include_private: true,
409 include_loopback: true,
410 include_broadcast: true,
411 }
412 }
413 /// Enable or disable IPv4 address extraction.
414 ///
415 /// Default: `true`
416 pub fn ipv4(&mut self, include: bool) -> &mut Self {
417 self.include_ipv4 = include;
418 self
419 }
420
421 /// Enable or disable IPv6 address extraction.
422 ///
423 /// Default: `true`
424 pub fn ipv6(&mut self, include: bool) -> &mut Self {
425 self.include_ipv6 = include;
426 self
427 }
428
429 /// Include private IP addresses (RFC 1918 for IPv4, ULA for IPv6).
430 ///
431 /// Private ranges include:
432 /// - IPv4: 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16
433 /// - IPv6: fc00::/7 (ULA), fe80::/10 (link-local)
434 ///
435 /// Default: `true`
436 pub fn private_ips(&mut self, include: bool) -> &mut Self {
437 self.include_private = include;
438 self
439 }
440
441 /// Include loopback addresses.
442 ///
443 /// Loopback ranges:
444 /// - IPv4: 127.0.0.0/8
445 /// - IPv6: ::1
446 ///
447 /// Default: `true`
448 pub fn loopback_ips(&mut self, include: bool) -> &mut Self {
449 self.include_loopback = include;
450 self
451 }
452
453 /// Include broadcast addresses.
454 ///
455 /// Covers:
456 /// - IPv4: 255.255.255.255 and link-local (169.254.0.0/16)
457 /// - IPv6: link-local and other special ranges
458 ///
459 /// Default: `true`
460 pub fn broadcast_ips(&mut self, include: bool) -> &mut Self {
461 self.include_broadcast = include;
462 self
463 }
464
465 /// Ignore private IP addresses (convenience for `.private_ips(false)`).
466 ///
467 /// Excludes:
468 /// - IPv4: 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16
469 /// - IPv6: fc00::/7 (ULA), fe80::/10 (link-local)
470 pub fn ignore_private(&mut self) -> &mut Self {
471 self.include_private = false;
472 self
473 }
474
475 /// Ignore loopback addresses (convenience for `.loopback_ips(false)`).
476 ///
477 /// Excludes:
478 /// - IPv4: 127.0.0.0/8
479 /// - IPv6: ::1
480 pub fn ignore_loopback(&mut self) -> &mut Self {
481 self.include_loopback = false;
482 self
483 }
484
485 /// Ignore broadcast addresses (convenience for `.broadcast_ips(false)`).
486 ///
487 /// Excludes:
488 /// - IPv4: 255.255.255.255 and link-local (169.254.0.0/16)
489 /// - IPv6: link-local and other special ranges
490 pub fn ignore_broadcast(&mut self) -> &mut Self {
491 self.include_broadcast = false;
492 self
493 }
494
495 /// Extract only publicly routable IP addresses.
496 ///
497 /// This is a convenience method equivalent to:
498 /// ```
499 /// # use ip_extract::ExtractorBuilder;
500 /// # let mut builder = ExtractorBuilder::new();
501 /// builder
502 /// .ignore_private()
503 /// .ignore_loopback()
504 /// .ignore_broadcast();
505 /// ```
506 ///
507 /// Excludes:
508 /// - Private: RFC 1918 (IPv4), ULA (IPv6)
509 /// - Loopback: 127.0.0.0/8, ::1
510 /// - Broadcast: 255.255.255.255, link-local ranges
511 ///
512 /// # Example
513 ///
514 /// ```no_run
515 /// use ip_extract::ExtractorBuilder;
516 ///
517 /// # fn main() -> anyhow::Result<()> {
518 /// let extractor = ExtractorBuilder::new()
519 /// .only_public()
520 /// .build()?;
521 /// # Ok(())
522 /// # }
523 /// ```
524 pub fn only_public(&mut self) -> &mut Self {
525 self.include_private = false;
526 self.include_loopback = false;
527 self.include_broadcast = false;
528 self
529 }
530
531 /// Build and return an `Extractor` with the configured settings.
532 ///
533 /// # Errors
534 ///
535 /// Returns an error if no IP version (IPv4 or IPv6) is enabled. At least one
536 /// must be selected.
537 ///
538 /// # Example
539 ///
540 /// ```no_run
541 /// use ip_extract::ExtractorBuilder;
542 ///
543 /// # fn main() -> anyhow::Result<()> {
544 /// let extractor = ExtractorBuilder::new()
545 /// .ipv4(true)
546 /// .ipv6(true)
547 /// .build()?;
548 /// # Ok(())
549 /// # }
550 /// ```
551 pub fn build(&self) -> anyhow::Result<Extractor> {
552 let (dfa, validators) = match (self.include_ipv4, self.include_ipv6) {
553 (true, true) => (
554 get_both_dfa(),
555 vec![
556 ValidatorType::IPv4 {
557 include_private: self.include_private,
558 include_loopback: self.include_loopback,
559 include_broadcast: self.include_broadcast,
560 },
561 ValidatorType::IPv6 {
562 include_private: self.include_private,
563 include_loopback: self.include_loopback,
564 },
565 ],
566 ),
567 (true, false) => (
568 get_ipv4_dfa(),
569 vec![ValidatorType::IPv4 {
570 include_private: self.include_private,
571 include_loopback: self.include_loopback,
572 include_broadcast: self.include_broadcast,
573 }],
574 ),
575 (false, true) => (
576 get_ipv6_dfa(),
577 vec![ValidatorType::IPv6 {
578 include_private: self.include_private,
579 include_loopback: self.include_loopback,
580 }],
581 ),
582 _ => anyhow::bail!("No IP address patterns selected"),
583 };
584 Ok(Extractor { dfa, validators })
585 }
586}
587
588/// Validate an IPv4 address from a byte slice, applying filters.
589///
590/// This function uses `parse_ipv4_bytes` for strict validation and then checks
591/// against the provided inclusion filters.
592///
593/// # Arguments
594///
595/// * `bytes` - Candidate byte slice to validate.
596/// * `include_private` - Whether to include RFC 1918 addresses.
597/// * `include_loopback` - Whether to include 127.0.0.0/8 addresses.
598/// * `include_broadcast` - Whether to include broadcast and link-local addresses.
599#[inline]
600fn validate_ipv4(
601 bytes: &[u8],
602 include_private: bool,
603 include_loopback: bool,
604 include_broadcast: bool,
605) -> bool {
606 let Some(ipv4) = parse_ipv4_bytes(bytes) else {
607 return false;
608 };
609
610 if !include_private && ipv4.is_private() {
611 return false;
612 }
613 if !include_loopback && ipv4.is_loopback() {
614 return false;
615 }
616 if !include_broadcast && (ipv4.is_broadcast() || ipv4.is_link_local()) {
617 return false;
618 }
619 true
620}
621
622/// Parse an IPv4 address from a byte slice.
623///
624/// Performs strict validation of dotted-quad notation (e.g., `192.168.1.1`).
625/// Rejects:
626/// - Octet values > 255
627/// - Leading zeros (e.g., `192.168.001.1`)
628/// - Invalid formats
629///
630/// # Arguments
631///
632/// * `bytes` - A byte slice containing a potential IPv4 address (7-15 bytes)
633///
634/// # Returns
635///
636/// `Some(Ipv4Addr)` if the bytes represent a valid IPv4 address, `None` otherwise.
637///
638/// # Example
639///
640/// ```
641/// use ip_extract::parse_ipv4_bytes;
642///
643/// assert_eq!(parse_ipv4_bytes(b"192.168.1.1"), Some("192.168.1.1".parse().unwrap()));
644/// assert_eq!(parse_ipv4_bytes(b"256.1.1.1"), None); // Out of range
645/// assert_eq!(parse_ipv4_bytes(b"192.168.01.1"), None); // Leading zero
646/// ```
647#[must_use]
648#[inline]
649pub fn parse_ipv4_bytes(bytes: &[u8]) -> Option<Ipv4Addr> {
650 if bytes.len() < 7 || bytes.len() > 15 {
651 return None;
652 }
653 let mut octets = [0u8; 4];
654 let mut octet_idx = 0;
655 let mut current_val = 0u16;
656 let mut digits_in_octet = 0;
657 for &b in bytes {
658 match b {
659 b'.' => {
660 if digits_in_octet == 0 || octet_idx == 3 {
661 return None;
662 }
663 #[allow(clippy::cast_possible_truncation)]
664 {
665 octets[octet_idx] = current_val as u8;
666 }
667 octet_idx += 1;
668 current_val = 0;
669 digits_in_octet = 0;
670 }
671 b'0'..=b'9' => {
672 let digit = u16::from(b - b'0');
673 if digits_in_octet > 0 && current_val == 0 {
674 return None;
675 }
676 current_val = current_val * 10 + digit;
677 if current_val > 255 {
678 return None;
679 }
680 digits_in_octet += 1;
681 }
682 _ => return None,
683 }
684 }
685 if octet_idx != 3 || digits_in_octet == 0 {
686 return None;
687 }
688 #[allow(clippy::cast_possible_truncation)]
689 {
690 octets[3] = current_val as u8;
691 }
692 Some(Ipv4Addr::new(octets[0], octets[1], octets[2], octets[3]))
693}
694
695/// Check if an IPv6 address is a Unique Local Address (ULA) per RFC 4193.
696/// ULA addresses are in the fc00::/7 range (fc00:: to fdff::).
697#[inline]
698fn is_unique_local(ip: &Ipv6Addr) -> bool {
699 matches!(ip.octets()[0], 0xfc | 0xfd)
700}
701
702/// Validate an IPv6 address from a byte slice, applying filters.
703///
704/// This function performs parsing and category-based filtering. It uses
705/// `unsafe` `from_utf8_unchecked` for performance, as the candidates are
706/// already filtered by the DFA for IP-like characters.
707///
708/// # Arguments
709///
710/// * `bytes` - Candidate byte slice to validate.
711/// * `include_private` - Whether to include ULA and link-local addresses.
712/// * `include_loopback` - Whether to include the loopback address (`::1`).
713#[inline]
714fn validate_ipv6(bytes: &[u8], include_private: bool, include_loopback: bool) -> bool {
715 if bytes.len() < 2 {
716 return false;
717 }
718 let s = unsafe { std::str::from_utf8_unchecked(bytes) };
719 let Ok(ip) = s.parse::<IpAddr>() else {
720 return false;
721 };
722
723 match ip {
724 IpAddr::V6(ipv6) => {
725 if !include_private && (ipv6.is_unicast_link_local() || is_unique_local(&ipv6)) {
726 return false;
727 }
728 if !include_loopback && ipv6.is_loopback() {
729 return false;
730 }
731 true
732 }
733 IpAddr::V4(_) => false,
734 }
735}
736
737impl std::fmt::Debug for Extractor {
738 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
739 f.debug_struct("Extractor")
740 .field("validators", &self.validators)
741 .finish()
742 }
743}