Skip to main content

protovalidate_buffa/
rules.rs

1pub mod string {
2    /// Canonical hyphenated UUID per RFC 4122 §3 — 36-char `8-4-4-4-12` hex form.
3    #[expect(
4        clippy::missing_const_for_fn,
5        reason = "uuid::Uuid::try_parse is not const"
6    )]
7    #[must_use]
8    pub fn is_uuid(s: &str) -> bool {
9        s.len() == 36 && ::uuid::Uuid::try_parse(s).is_ok()
10    }
11
12    /// Trimmed UUID — same bytes without dashes (32 hex characters).
13    #[must_use]
14    pub fn is_tuuid(s: &str) -> bool {
15        s.len() == 32 && s.chars().all(|c| c.is_ascii_hexdigit())
16    }
17
18    /// Canonical 26-char Crockford ULID.
19    ///
20    /// The `ulid` crate handles length +
21    /// alphabet; protovalidate additionally rejects values above 7ZZ...ZZ
22    /// (the first Crockford base32 character cannot exceed '7' since a
23    /// ULID is at most 2^128 − 1).
24    #[must_use]
25    pub fn is_ulid(s: &str) -> bool {
26        if ::ulid::Ulid::from_string(s).is_err() {
27            return false;
28        }
29        let first = s.as_bytes()[0].to_ascii_uppercase();
30        (b'0'..=b'7').contains(&first)
31    }
32
33    /// IPv4 in dotted-quad form.
34    #[must_use]
35    pub fn is_ipv4(s: &str) -> bool {
36        s.parse::<::std::net::Ipv4Addr>().is_ok()
37    }
38
39    /// IPv6 — any valid RFC 4291 textual form. Also accepts RFC 6874 zone-id
40    /// suffix `%<zone>` (any non-null characters).
41    #[must_use]
42    pub fn is_ipv6(s: &str) -> bool {
43        let (addr, zone) = match s.split_once('%') {
44            Some((a, z)) => (a, Some(z)),
45            None => (s, None),
46        };
47        if addr.parse::<::std::net::Ipv6Addr>().is_err() {
48            return false;
49        }
50        zone.is_none_or(|z| !z.is_empty() && !z.contains('\0'))
51    }
52
53    /// IPv4 OR IPv6.
54    #[must_use]
55    pub fn is_ip(s: &str) -> bool {
56        // Accept IPv4 or IPv6 (including IPv6 with zone-id suffix).
57        s.parse::<::std::net::IpAddr>().is_ok() || is_ipv6(s)
58    }
59
60    /// Reject prefix-length strings with a leading zero (e.g. `/04`).
61    fn prefix_no_leading_zero(s: &str) -> bool {
62        let Some((_, prefix)) = s.split_once('/') else {
63            return true;
64        };
65        !(prefix.len() > 1 && prefix.starts_with('0'))
66    }
67
68    /// IPv4 with prefix length `/N` (0..=32).
69    #[must_use]
70    pub fn is_ipv4_with_prefixlen(s: &str) -> bool {
71        prefix_no_leading_zero(s) && s.parse::<::ipnet::Ipv4Net>().is_ok()
72    }
73
74    /// IPv6 with prefix length `/N` (0..=128).
75    #[must_use]
76    pub fn is_ipv6_with_prefixlen(s: &str) -> bool {
77        prefix_no_leading_zero(s) && s.parse::<::ipnet::Ipv6Net>().is_ok()
78    }
79
80    /// Either IPv4 or IPv6 with a prefix length.
81    #[must_use]
82    pub fn is_ip_with_prefixlen(s: &str) -> bool {
83        prefix_no_leading_zero(s) && s.parse::<::ipnet::IpNet>().is_ok()
84    }
85
86    /// IPv4 canonical prefix — prefix length `/N` AND host bits zero.
87    #[must_use]
88    pub fn is_ipv4_prefix(s: &str) -> bool {
89        if !prefix_no_leading_zero(s) {
90            return false;
91        }
92        let Ok(net) = s.parse::<::ipnet::Ipv4Net>() else {
93            return false;
94        };
95        net.network() == net.addr()
96    }
97
98    /// IPv6 canonical prefix — prefix length AND host bits zero.
99    #[must_use]
100    pub fn is_ipv6_prefix(s: &str) -> bool {
101        if !prefix_no_leading_zero(s) {
102            return false;
103        }
104        let Ok(net) = s.parse::<::ipnet::Ipv6Net>() else {
105            return false;
106        };
107        net.network() == net.addr()
108    }
109
110    /// IPv4 or IPv6 canonical prefix.
111    #[must_use]
112    pub fn is_ip_prefix(s: &str) -> bool {
113        is_ipv4_prefix(s) || is_ipv6_prefix(s)
114    }
115
116    /// RFC 1035 hostname — labels of 1..=63 LDH characters, total ≤253 chars,
117    /// and the final label must not be all digits.
118    #[must_use]
119    pub fn is_hostname(s: &str) -> bool {
120        if s.is_empty() || s.len() > 253 {
121            return false;
122        }
123        let trimmed = s.strip_suffix('.').unwrap_or(s);
124        let labels: Vec<&str> = trimmed.split('.').collect();
125        if labels.is_empty() {
126            return false;
127        }
128        for label in &labels {
129            if label.is_empty()
130                || label.len() > 63
131                || label.starts_with('-')
132                || label.ends_with('-')
133                || !label
134                    .bytes()
135                    .all(|b| b.is_ascii_alphanumeric() || b == b'-')
136            {
137                return false;
138            }
139        }
140        if let Some(last) = labels.last()
141            && last.bytes().all(|b| b.is_ascii_digit())
142        {
143            return false;
144        }
145        true
146    }
147
148    /// Hostname plus `:port` (0..=65535).
149    ///
150    /// Port must be plain digits with no leading zeros (except the single "0")
151    /// and no sign.
152    /// Parses via [`http::uri::Authority`], then applies protovalidate's
153    /// stricter checks (exact round-trip — rejects userinfo `@`, forces
154    /// canonical port spelling, and requires the host to be a hostname,
155    /// IPv4, or bracketed IPv6).
156    #[must_use]
157    pub fn is_host_and_port(s: &str) -> bool {
158        fn is_valid_port(p: &str) -> bool {
159            if p.is_empty() || !p.bytes().all(|b| b.is_ascii_digit()) {
160                return false;
161            }
162            if p.len() > 1 && p.starts_with('0') {
163                return false;
164            }
165            p.parse::<u16>().is_ok()
166        }
167        let Ok(auth) = s.parse::<::http::uri::Authority>() else {
168            return false;
169        };
170        // Reject anything `http` accepts that we don't: userinfo, unusual
171        // whitespace, non-canonical port.
172        if auth.as_str() != s {
173            return false;
174        }
175        let host = auth.host();
176        let Some(port) = auth.port() else {
177            return false;
178        };
179        if !is_valid_port(port.as_str()) {
180            return false;
181        }
182        if host.starts_with('[') {
183            let Some(inner) = host.strip_prefix('[').and_then(|x| x.strip_suffix(']')) else {
184                return false;
185            };
186            return is_ipv6(inner);
187        }
188        is_hostname(host) || is_ipv4(host)
189    }
190
191    /// Email — permissive RFC 5321 addr-spec. protovalidate's reference
192    /// implementation allows leading/trailing and consecutive dots in the
193    /// local part but rejects a trailing dot on the domain.
194    #[must_use]
195    pub fn is_email(s: &str) -> bool {
196        if s.is_empty() || s.len() > 254 {
197            return false;
198        }
199        let Some((local, domain)) = s.rsplit_once('@') else {
200            return false;
201        };
202        if local.is_empty() {
203            return false;
204        }
205        let local_ok = local.bytes().all(|b| {
206            b.is_ascii_alphanumeric()
207                || matches!(
208                    b,
209                    b'.' | b'_'
210                        | b'-'
211                        | b'+'
212                        | b'%'
213                        | b'!'
214                        | b'#'
215                        | b'$'
216                        | b'&'
217                        | b'\''
218                        | b'*'
219                        | b'/'
220                        | b'='
221                        | b'?'
222                        | b'^'
223                        | b'`'
224                        | b'{'
225                        | b'|'
226                        | b'}'
227                        | b'~'
228                )
229        });
230        if !local_ok {
231            return false;
232        }
233        // Domain must not have a trailing dot in email context.
234        if domain.ends_with('.') {
235            return false;
236        }
237        // Email accepts all-digit domain labels (unlike strict is_hostname).
238        is_email_domain(domain)
239    }
240
241    fn is_email_domain(s: &str) -> bool {
242        if s.is_empty() || s.len() > 253 {
243            return false;
244        }
245        s.split('.').all(|label| {
246            !label.is_empty()
247                && label.len() <= 63
248                && label
249                    .bytes()
250                    .next()
251                    .is_some_and(|b| b.is_ascii_alphanumeric())
252                && label
253                    .bytes()
254                    .last()
255                    .is_some_and(|b| b.is_ascii_alphanumeric())
256                && label
257                    .bytes()
258                    .all(|b| b.is_ascii_alphanumeric() || b == b'-')
259        })
260    }
261
262    /// Protobuf FQN: dot-separated identifiers (no leading digit per segment).
263    #[must_use]
264    pub fn is_protobuf_fqn(s: &str) -> bool {
265        if s.is_empty() {
266            return false;
267        }
268        s.split('.').all(|seg| {
269            !seg.is_empty()
270                && seg
271                    .chars()
272                    .next()
273                    .is_some_and(|c| c.is_ascii_alphabetic() || c == '_')
274                && seg.bytes().all(|b| b.is_ascii_alphanumeric() || b == b'_')
275        })
276    }
277
278    /// Like `is_protobuf_fqn` but rejects leading/trailing dots and double dots.
279    #[must_use]
280    pub fn is_protobuf_dot_fqn(s: &str) -> bool {
281        // `.foo.bar.Baz` — one leading dot, then FQN.
282        let trimmed = s.strip_prefix('.').unwrap_or(s);
283        if trimmed == s {
284            return is_protobuf_fqn(s);
285        }
286        is_protobuf_fqn(trimmed)
287    }
288
289    /// address = hostname OR IP.
290    #[must_use]
291    pub fn is_address(s: &str) -> bool {
292        is_hostname(s) || is_ip(s)
293    }
294
295    /// Well-known regex — HTTP header name.
296    ///
297    /// `strict=true` follows RFC 7230 token syntax (delegated to
298    /// [`http::HeaderName::from_bytes`]) plus HTTP/2 pseudo-headers starting
299    /// with `:` (which the `http` crate rejects); `strict=false` is looser
300    /// (no CR/LF/NUL).
301    #[must_use]
302    pub fn is_header_name(s: &str, strict: bool) -> bool {
303        if s.is_empty() {
304            return false;
305        }
306        if strict {
307            // HTTP/2 pseudo-header — not accepted by `http::HeaderName`.
308            if let Some(rest) = s.strip_prefix(':') {
309                return !rest.is_empty()
310                    && rest.bytes().all(|b| b.is_ascii_alphanumeric() || b == b'-');
311            }
312            ::http::HeaderName::from_bytes(s.as_bytes()).is_ok()
313        } else {
314            s.bytes().all(|b| !matches!(b, 0 | b'\r' | b'\n'))
315        }
316    }
317
318    /// Well-known regex — HTTP header value. `strict=true` delegates to
319    /// [`http::HeaderValue::from_bytes`] (visible ASCII + tab); `strict=false`
320    /// is looser (no CR/LF/NUL).
321    #[must_use]
322    pub fn is_header_value(s: &str, strict: bool) -> bool {
323        if strict {
324            ::http::HeaderValue::from_bytes(s.as_bytes()).is_ok()
325        } else {
326            s.bytes().all(|b| !matches!(b, 0 | b'\r' | b'\n'))
327        }
328    }
329
330    /// True when `path` is equal to `candidate` or is a sub-path of it
331    /// (i.e. `candidate` is a prefix at a path-segment boundary).
332    #[must_use]
333    pub fn fieldmask_covers(candidate: &str, path: &str) -> bool {
334        if path == candidate {
335            return true;
336        }
337        if path.len() > candidate.len()
338            && path.starts_with(candidate)
339            && path.as_bytes()[candidate.len()] == b'.'
340        {
341            return true;
342        }
343        false
344    }
345
346    /// URI per RFC 3986 §3. Accepts absolute URIs (scheme required).
347    #[must_use]
348    pub fn is_uri(s: &str) -> bool {
349        // fluent-uri is strict RFC 3986 and rejects IP-literal hosts with
350        // RFC 6874 zone-id (`[::1%25eth0]`). Strip a syntactically-valid
351        // zone-id before parsing, then re-validate it ourselves.
352        let (candidate, host_ok) = prepare_for_uri_parse(s);
353        if !host_ok {
354            return false;
355        }
356        let Ok(u) = ::fluent_uri::Uri::<&str>::parse(candidate.as_ref()) else {
357            return false;
358        };
359        if let Some(auth) = u.authority() {
360            let host = auth.host();
361            if !host.starts_with('[') {
362                return pct_decode_valid_utf8(host);
363            }
364        }
365        true
366    }
367
368    /// URI reference per RFC 3986 §4.1. Either a URI or a relative-ref.
369    #[must_use]
370    pub fn is_uri_ref(s: &str) -> bool {
371        if s.is_empty() {
372            return true;
373        }
374        let (candidate, host_ok) = prepare_for_uri_parse(s);
375        if !host_ok {
376            return false;
377        }
378        let Ok(u) = ::fluent_uri::UriRef::<&str>::parse(candidate.as_ref()) else {
379            return false;
380        };
381        if let Some(auth) = u.authority() {
382            let host = auth.host();
383            if !host.starts_with('[') {
384                return pct_decode_valid_utf8(host);
385            }
386        }
387        true
388    }
389
390    /// If the input has an IP-literal host `[...]`, validate its contents
391    /// against our stricter IPv6-zone-id / `IPFuture` rules (fluent-uri won't
392    /// accept RFC 6874 hosts). Returns `(candidate_for_fluent, ok)`.
393    /// When an IP-literal is present and valid, the returned candidate has
394    /// the zone-id stripped so fluent-uri can parse the remaining URI.
395    fn prepare_for_uri_parse(s: &str) -> (std::borrow::Cow<'_, str>, bool) {
396        let Some(lb) = s.find('[') else {
397            return (s.into(), true);
398        };
399        let Some(rb_rel) = s[lb..].find(']') else {
400            return (s.into(), false);
401        };
402        let rb = lb + rb_rel;
403        let inner = &s[lb + 1..rb];
404        if !is_ip_literal_content(inner) {
405            return (s.into(), false);
406        }
407        // Strip zone-id so fluent-uri sees a vanilla IPv6.
408        if let Some(pos) = inner.find("%25") {
409            let mut rewritten = String::with_capacity(s.len());
410            rewritten.push_str(&s[..=lb]);
411            rewritten.push_str(&inner[..pos]);
412            rewritten.push_str(&s[rb..]);
413            return (rewritten.into(), true);
414        }
415        (s.into(), true)
416    }
417
418    /// Validate the contents of an RFC 3986 IP-literal (between `[` and `]`).
419    /// Accepts `IPFuture` `v<hex>.<reserved>` or `IPv6address` with optional
420    /// RFC 6874 zone-id `%25<non-empty pct-encoded reg-name>`.
421    fn is_ip_literal_content(inner: &str) -> bool {
422        if let Some(rest) = inner.strip_prefix('v').or_else(|| inner.strip_prefix('V')) {
423            let bs = rest.as_bytes();
424            let mut j = 0;
425            while bs.get(j).is_some_and(u8::is_ascii_hexdigit) {
426                j += 1;
427            }
428            if j == 0 {
429                return false;
430            }
431            if bs.get(j) != Some(&b'.') {
432                return false;
433            }
434            j += 1;
435            if j >= bs.len() {
436                return false;
437            }
438            return bs[j..].iter().all(|b| {
439                b.is_ascii_alphanumeric()
440                    || matches!(*b, b'-' | b'.' | b'_' | b'~')
441                    || matches!(
442                        *b,
443                        b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' | b',' | b';' | b'='
444                    )
445                    || *b == b':'
446            });
447        }
448        let (addr, zone_opt) = inner.find("%25").map_or((inner, None), |pos| {
449            (&inner[..pos], Some(&inner[pos + 3..]))
450        });
451        if addr.parse::<::std::net::Ipv6Addr>().is_err() {
452            return false;
453        }
454        if let Some(zone) = zone_opt {
455            if zone.is_empty() {
456                return false;
457            }
458            if !pct_decode_valid_utf8(zone) {
459                return false;
460            }
461        }
462        true
463    }
464
465    /// Check that a string is composed of unreserved chars or valid
466    /// pct-encoded triplets, AND that pct-decoding yields valid UTF-8.
467    /// Uses [`percent_encoding::percent_decode_str`] for the decode; only
468    /// the hex-digit validation is done manually (the crate silently passes
469    /// through malformed `%XY` rather than erroring).
470    fn pct_decode_valid_utf8(input: &str) -> bool {
471        // Reject any `%` not followed by two hex digits.
472        let bytes = input.as_bytes();
473        let mut idx = 0;
474        while idx < bytes.len() {
475            if bytes[idx] == b'%' {
476                if idx + 2 >= bytes.len()
477                    || !bytes[idx + 1].is_ascii_hexdigit()
478                    || !bytes[idx + 2].is_ascii_hexdigit()
479                {
480                    return false;
481                }
482                idx += 3;
483            } else {
484                idx += 1;
485            }
486        }
487        ::percent_encoding::percent_decode_str(input)
488            .decode_utf8()
489            .is_ok()
490    }
491}
492
493pub mod float {
494    #[must_use]
495    pub const fn is_finite_f32(f: f32) -> bool {
496        f.is_finite()
497    }
498    #[must_use]
499    pub const fn is_finite_f64(f: f64) -> bool {
500        f.is_finite()
501    }
502}