cloakrs_patterns/
ip_address.rs1use crate::common::{compile_regex, confidence, context_boost};
2use cloakrs_core::{Confidence, EntityType, Locale, PiiEntity, Recognizer, Span};
3use once_cell::sync::Lazy;
4use regex::Regex;
5use std::collections::HashSet;
6use std::net::IpAddr;
7
8static IPV4_REGEX: Lazy<Regex> = Lazy::new(|| compile_regex(r"(?:\d{1,3}\.){3}\d{1,3}"));
9static IPV6_REGEX: Lazy<Regex> =
10 Lazy::new(|| compile_regex(r"[0-9A-Fa-f]{0,4}(?::[0-9A-Fa-f]{0,4}){2,7}"));
11
12const CONTEXT_WORDS: &[&str] = &[
13 "ip",
14 "ip address",
15 "remote_addr",
16 "client_ip",
17 "source ip",
18 "destination ip",
19 "host",
20 "server",
21];
22
23#[derive(Debug, Clone, Copy, Default)]
36pub struct IpAddressRecognizer;
37
38impl Recognizer for IpAddressRecognizer {
39 fn id(&self) -> &str {
40 "ip_address_std_v1"
41 }
42
43 fn entity_type(&self) -> EntityType {
44 EntityType::IpAddress
45 }
46
47 fn supported_locales(&self) -> &[Locale] {
48 &[]
49 }
50
51 fn scan(&self, text: &str) -> Vec<PiiEntity> {
52 let mut seen = HashSet::new();
53 let mut findings = Vec::new();
54
55 for regex in [&*IPV4_REGEX, &*IPV6_REGEX] {
56 for matched in regex.find_iter(text) {
57 if seen.insert((matched.start(), matched.end()))
58 && self.is_valid_match(text, matched.start(), matched.end())
59 {
60 findings.push(PiiEntity {
61 entity_type: self.entity_type(),
62 span: Span::new(matched.start(), matched.end()),
63 text: matched.as_str().to_string(),
64 confidence: self.compute_confidence(
65 text,
66 matched.start(),
67 matched.as_str(),
68 ),
69 recognizer_id: self.id().to_string(),
70 });
71 }
72 }
73 }
74
75 findings.sort_by_key(|finding| finding.span.start);
76 findings
77 }
78
79 fn validate(&self, candidate: &str) -> bool {
80 parse_ip(candidate).is_some()
81 }
82}
83
84impl IpAddressRecognizer {
85 fn is_valid_match(&self, text: &str, start: usize, end: usize) -> bool {
86 self.validate(&text[start..end]) && is_ip_boundary(text, start, end)
87 }
88
89 fn compute_confidence(&self, text: &str, start: usize, candidate: &str) -> Confidence {
90 let base = match parse_ip(candidate) {
91 Some(IpAddr::V4(_)) => 0.85,
92 Some(IpAddr::V6(_)) => 0.90,
93 None => 0.0,
94 };
95 confidence(base + context_boost(text, start, CONTEXT_WORDS))
96 }
97}
98
99fn parse_ip(candidate: &str) -> Option<IpAddr> {
100 if candidate == "::" {
101 return None;
102 }
103 candidate.parse::<IpAddr>().ok()
104}
105
106fn is_ip_boundary(text: &str, start: usize, end: usize) -> bool {
107 let before = text[..start].chars().next_back();
108 let after = text[end..].chars().next();
109 !before.is_some_and(is_ip_continuation) && !after.is_some_and(is_ip_continuation)
110}
111
112fn is_ip_continuation(c: char) -> bool {
113 c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | '.' | ':' | '%')
114}
115
116#[cfg(test)]
117mod tests {
118 use super::*;
119 use cloakrs_core::RecognizerRegistry;
120
121 fn texts(input: &str) -> Vec<String> {
122 IpAddressRecognizer
123 .scan(input)
124 .into_iter()
125 .map(|finding| finding.text)
126 .collect()
127 }
128
129 #[test]
130 fn test_ip_address_ipv4_public_detected() {
131 assert_eq!(texts("client_ip=203.0.113.42"), ["203.0.113.42"]);
132 }
133
134 #[test]
135 fn test_ip_address_ipv4_private_detected() {
136 assert_eq!(texts("host 192.168.1.105"), ["192.168.1.105"]);
137 }
138
139 #[test]
140 fn test_ip_address_ipv4_loopback_detected() {
141 assert_eq!(texts("server 127.0.0.1"), ["127.0.0.1"]);
142 }
143
144 #[test]
145 fn test_ip_address_ipv4_zero_octets_detected() {
146 assert_eq!(texts("remote 0.0.0.0"), ["0.0.0.0"]);
147 }
148
149 #[test]
150 fn test_ip_address_ipv6_full_detected() {
151 assert_eq!(
152 texts("addr 2001:0db8:85a3:0000:0000:8a2e:0370:7334"),
153 ["2001:0db8:85a3:0000:0000:8a2e:0370:7334"]
154 );
155 }
156
157 #[test]
158 fn test_ip_address_ipv6_compressed_detected() {
159 assert_eq!(texts("source ip 2001:db8::1"), ["2001:db8::1"]);
160 }
161
162 #[test]
163 fn test_ip_address_ipv6_loopback_detected() {
164 assert_eq!(texts("host ::1"), ["::1"]);
165 }
166
167 #[test]
168 fn test_ip_address_ipv4_octet_above_range_rejected() {
169 assert!(texts("client_ip=256.168.1.1").is_empty());
170 }
171
172 #[test]
173 fn test_ip_address_ipv4_partial_octet_range_rejected() {
174 assert!(texts("client_ip=999.10.10.10").is_empty());
175 }
176
177 #[test]
178 fn test_ip_address_ipv6_invalid_shape_rejected() {
179 assert!(texts("addr 2001:::1").is_empty());
180 }
181
182 #[test]
183 fn test_ip_address_ipv6_punctuation_only_rejected() {
184 assert!(texts("addr ::").is_empty());
185 }
186
187 #[test]
188 fn test_ip_address_ipv4_embedded_in_larger_dot_sequence_rejected() {
189 assert!(texts("version 192.168.1.1.5").is_empty());
190 }
191
192 #[test]
193 fn test_ip_address_ipv4_embedded_in_word_rejected() {
194 assert!(texts("id192.168.1.1").is_empty());
195 }
196
197 #[test]
198 fn test_ip_address_ipv6_embedded_in_larger_colon_sequence_rejected() {
199 assert!(texts("addr x2001:db8::1").is_empty());
200 }
201
202 #[test]
203 fn test_ip_address_multiple_findings_detected() {
204 assert_eq!(
205 texts("client_ip=203.0.113.42 server 2001:db8::5"),
206 ["203.0.113.42", "2001:db8::5"]
207 );
208 }
209
210 #[test]
211 fn test_ip_address_context_boosts_ipv4_confidence() {
212 let with_context = IpAddressRecognizer.scan("client_ip=203.0.113.42");
213 let without_context = IpAddressRecognizer.scan("value 203.0.113.42");
214 assert!(with_context[0].confidence > without_context[0].confidence);
215 }
216
217 #[test]
218 fn test_ip_address_context_boosts_ipv6_confidence() {
219 let with_context = IpAddressRecognizer.scan("destination ip 2001:db8::1");
220 let without_context = IpAddressRecognizer.scan("value 2001:db8::1");
221 assert!(with_context[0].confidence > without_context[0].confidence);
222 }
223
224 #[test]
225 fn test_ip_address_supported_locales_are_universal() {
226 assert!(IpAddressRecognizer.supported_locales().is_empty());
227 }
228
229 #[test]
230 fn test_ip_address_validate_accepts_valid_ipv4() {
231 assert!(IpAddressRecognizer.validate("203.0.113.42"));
232 }
233
234 #[test]
235 fn test_ip_address_validate_rejects_invalid_ipv6() {
236 assert!(!IpAddressRecognizer.validate("2001:::1"));
237 }
238
239 #[test]
240 fn test_ip_address_registry_integration_detects_default_recognizer() {
241 let mut registry = RecognizerRegistry::new();
242 crate::register_default_recognizers(&mut registry);
243
244 let findings = registry.scan_all("remote_addr=203.0.113.42");
245
246 assert!(findings
247 .iter()
248 .any(|finding| finding.entity_type == EntityType::IpAddress));
249 }
250}