atproto_identity/validation.rs
1//! Input validation for AT Protocol handles and DIDs.
2//!
3//! This module provides comprehensive validation functions for various identifier formats
4//! used in the AT Protocol ecosystem. All validation follows established standards including
5//! RFC 1035 for hostnames and AT Protocol specifications for handle and DID formats.
6//!
7//! # Main Functions
8//!
9//! ## Handle Validation
10//! - [`is_valid_handle`] - Validates and normalizes AT Protocol handles
11//! - [`strip_handle_prefixes`] - Removes common handle prefixes (`@`, `at://`)
12//!
13//! ## DID Validation
14//! - [`is_valid_did_method_plc`] - Validates PLC DIDs (`did:plc:...`)
15//! - [`is_valid_did_method_web`] - Validates Web DIDs (`did:web:...`)
16//! - [`is_valid_did_method_webvh`] - Validates WebVH DIDs (`did:webvh:...`)
17//!
18//! ## Network Address Validation
19//! - [`is_valid_hostname`] - RFC 1035 compliant hostname validation
20//! - [`is_ipv4`] - IPv4 address validation
21//! - [`is_ipv6`] - IPv6 address validation
22//!
23//! ## Utility Functions
24//! - [`is_valid_base58_btc`] - Base58-btc alphabet character validation
25//!
26//! # Examples
27//!
28//! ```
29//! use atproto_identity::validation::*;
30//!
31//! // Handle validation
32//! assert_eq!(is_valid_handle("@alice.bsky.social"), Some("alice.bsky.social".to_string()));
33//!
34//! // DID validation
35//! assert!(is_valid_did_method_plc("did:plc:z3f2222fa222f5c33c2f27ez"));
36//! assert!(is_valid_did_method_web("did:web:example.com", true));
37//! assert!(is_valid_did_method_webvh("did:webvh:abc123:example.com", true));
38//!
39//! // Network validation
40//! assert!(is_valid_hostname("example.com"));
41//! assert!(is_ipv4("192.168.1.1"));
42//! assert!(is_ipv6("2001:db8::1"));
43//! ```
44
45/// Maximum length for a valid hostname as defined in RFC 1035
46const MAX_HOSTNAME_LENGTH: usize = 253;
47
48/// Maximum length for a DNS label (component between dots) as defined in RFC 1035
49const MAX_LABEL_LENGTH: usize = 63;
50
51/// List of reserved top-level domains that are not valid for AT Protocol handles
52const RESERVED_TLDS: [&str; 4] = [".localhost", ".internal", ".arpa", ".local"];
53
54/// Validates if a string is a valid hostname according to RFC 1035.
55///
56/// A valid hostname must:
57/// - Be between 1 and 253 characters in length
58/// - Not use reserved top-level domains (.localhost, .internal, .arpa, .local)
59/// - Not be an IPv4 or IPv6 address
60/// - Contain only valid hostname characters (letters, digits, hyphens, dots)
61/// - Have valid DNS labels (no leading/trailing hyphens, max 63 chars per label)
62///
63/// # Arguments
64///
65/// * `hostname` - The hostname string to validate
66///
67/// # Returns
68///
69/// `true` if the hostname is valid according to RFC 1035, `false` otherwise
70///
71/// # Examples
72///
73/// ```
74/// use atproto_identity::validation::is_valid_hostname;
75///
76/// // Valid hostnames
77/// assert!(is_valid_hostname("example.com"));
78/// assert!(is_valid_hostname("sub.example.com"));
79/// assert!(is_valid_hostname("test-host.example.com"));
80/// assert!(is_valid_hostname("localhost"));
81///
82/// // Invalid hostnames
83/// assert!(!is_valid_hostname("192.168.1.1")); // IPv4 address
84/// assert!(!is_valid_hostname("example.localhost")); // Reserved TLD
85/// assert!(!is_valid_hostname("example..com")); // Double dot
86/// assert!(!is_valid_hostname("-example.com")); // Leading hyphen
87/// ```
88pub fn is_valid_hostname(hostname: &str) -> bool {
89 // Empty hostnames are invalid
90 if hostname.is_empty() || hostname.len() > MAX_HOSTNAME_LENGTH {
91 return false;
92 }
93
94 // Check if hostname uses any reserved TLDs
95 if RESERVED_TLDS.iter().any(|tld| hostname.ends_with(tld)) {
96 return false;
97 }
98
99 // Reject IPv4 addresses
100 if is_ipv4(hostname) {
101 return false;
102 }
103
104 // Reject IPv6 addresses
105 if is_ipv6(hostname) {
106 return false;
107 }
108
109 // Ensure all characters are valid hostname characters
110 if hostname.bytes().any(|byte| !is_valid_hostname_char(byte)) {
111 return false;
112 }
113
114 // Validate each DNS label in the hostname
115 if hostname.split('.').any(|label| !is_valid_dns_label(label)) {
116 return false;
117 }
118
119 true
120}
121
122fn is_valid_hostname_char(byte: u8) -> bool {
123 byte.is_ascii_lowercase()
124 || byte.is_ascii_uppercase()
125 || byte.is_ascii_digit()
126 || byte == b'-'
127 || byte == b'.'
128}
129
130fn is_valid_dns_label(label: &str) -> bool {
131 !(label.is_empty()
132 || label.len() > MAX_LABEL_LENGTH
133 || label.starts_with('-')
134 || label.ends_with('-'))
135}
136
137/// Checks if a string is a valid IPv4 address.
138///
139/// Validates that the string consists of exactly four decimal numbers
140/// separated by dots, where each number is between 0 and 255.
141///
142/// # Arguments
143///
144/// * `s` - The string to validate as an IPv4 address
145///
146/// # Returns
147///
148/// `true` if the string is a valid IPv4 address, `false` otherwise
149///
150/// # Examples
151///
152/// ```
153/// use atproto_identity::validation::is_ipv4;
154///
155/// // Valid IPv4 addresses
156/// assert!(is_ipv4("192.168.1.1"));
157/// assert!(is_ipv4("127.0.0.1"));
158/// assert!(is_ipv4("255.255.255.255"));
159/// assert!(is_ipv4("0.0.0.0"));
160///
161/// // Invalid IPv4 addresses
162/// assert!(!is_ipv4("256.1.1.1")); // Number too large
163/// assert!(!is_ipv4("192.168.1")); // Missing octet
164/// assert!(!is_ipv4("192.168.1.1.1")); // Too many octets
165/// assert!(!is_ipv4("example.com")); // Not numeric
166/// ```
167pub fn is_ipv4(s: &str) -> bool {
168 let parts: Vec<&str> = s.split('.').collect();
169 if parts.len() != 4 {
170 return false;
171 }
172
173 parts.iter().all(|part| part.parse::<u8>().is_ok())
174}
175
176/// Checks if a string is a valid IPv6 address.
177///
178/// Performs basic IPv6 validation including:
179/// - Must contain colons (distinguishing from IPv4)
180/// - Supports brackets for URLs (e.g., `[2001:db8::1]`)
181/// - Validates compressed notation with `::` (at most one occurrence)
182/// - Each segment must be valid hexadecimal (1-4 characters)
183/// - At most 8 segments total
184///
185/// # Arguments
186///
187/// * `s` - The string to validate as an IPv6 address
188///
189/// # Returns
190///
191/// `true` if the string is a valid IPv6 address, `false` otherwise
192///
193/// # Examples
194///
195/// ```
196/// use atproto_identity::validation::is_ipv6;
197///
198/// // Valid IPv6 addresses
199/// assert!(is_ipv6("2001:db8::1"));
200/// assert!(is_ipv6("::1"));
201/// assert!(is_ipv6("fe80::1"));
202/// assert!(is_ipv6("[2001:db8::1]")); // With brackets
203/// assert!(is_ipv6("2001:0db8:0000:0000:0000:ff00:0042:8329"));
204///
205/// // Invalid IPv6 addresses
206/// assert!(!is_ipv6("192.168.1.1")); // IPv4, not IPv6
207/// assert!(!is_ipv6("example.com")); // No colons
208/// assert!(!is_ipv6("2001:gggg::1")); // Invalid hex characters
209/// ```
210pub fn is_ipv6(s: &str) -> bool {
211 // Basic IPv6 validation - must contain colons and valid hex characters
212 if !s.contains(':') {
213 return false;
214 }
215
216 // Check for IPv6 with brackets
217 let s = if s.starts_with('[') && s.ends_with(']') {
218 &s[1..s.len() - 1]
219 } else {
220 s
221 };
222
223 // Split by :: for compressed notation
224 let parts: Vec<&str> = s.split("::").collect();
225 if parts.len() > 2 {
226 return false; // More than one :: is invalid
227 }
228
229 // Validate each segment
230 let segments: Vec<&str> = s.split(':').filter(|s| !s.is_empty()).collect();
231
232 // IPv6 can have at most 8 segments (or fewer with ::)
233 if segments.len() > 8 {
234 return false;
235 }
236
237 // Each segment must be valid hexadecimal and at most 4 characters
238 segments
239 .iter()
240 .all(|segment| segment.len() <= 4 && segment.chars().all(|c| c.is_ascii_hexdigit()))
241}
242
243/// Validates and normalizes an AT Protocol handle.
244///
245/// A valid AT Protocol handle must:
246/// - Be a valid hostname (after stripping prefixes)
247/// - Contain at least one period (to distinguish from simple hostnames)
248/// - Follow all hostname validation rules (RFC 1035)
249///
250/// The function automatically strips common prefixes (`at://` and `@`) before validation.
251///
252/// # Arguments
253///
254/// * `handle` - The handle string to validate and normalize
255///
256/// # Returns
257///
258/// `Some(String)` containing the normalized handle if valid, `None` if invalid
259///
260/// # Examples
261///
262/// ```
263/// use atproto_identity::validation::is_valid_handle;
264///
265/// // Valid handles
266/// assert_eq!(is_valid_handle("alice.bsky.social"), Some("alice.bsky.social".to_string()));
267/// assert_eq!(is_valid_handle("@bob.example.com"), Some("bob.example.com".to_string()));
268/// assert_eq!(is_valid_handle("at://charlie.test.com"), Some("charlie.test.com".to_string()));
269///
270/// // Invalid handles
271/// assert_eq!(is_valid_handle("localhost"), None); // No period
272/// assert_eq!(is_valid_handle("192.168.1.1"), None); // IPv4 address
273/// assert_eq!(is_valid_handle("invalid..handle.com"), None); // Double dot
274/// ```
275pub fn is_valid_handle(handle: &str) -> Option<String> {
276 // Strip optional prefixes to get the core handle
277 let trimmed = strip_handle_prefixes(handle);
278
279 // A valid handle must be a valid hostname with at least one period
280 if is_valid_hostname(trimmed) && trimmed.contains('.') {
281 Some(trimmed.to_string())
282 } else {
283 None
284 }
285}
286
287/// Strips common AT Protocol handle prefixes from a handle string.
288///
289/// Removes the `at://` or `@` prefix if present, returning the clean handle.
290/// This is useful for normalizing handle input from various sources.
291///
292/// # Arguments
293///
294/// * `handle` - The handle string that may contain prefixes
295///
296/// # Returns
297///
298/// The handle string with prefixes removed
299///
300/// # Examples
301///
302/// ```
303/// use atproto_identity::validation::strip_handle_prefixes;
304///
305/// assert_eq!(strip_handle_prefixes("@alice.bsky.social"), "alice.bsky.social");
306/// assert_eq!(strip_handle_prefixes("at://bob.example.com"), "bob.example.com");
307/// assert_eq!(strip_handle_prefixes("charlie.test.com"), "charlie.test.com");
308/// ```
309pub fn strip_handle_prefixes(handle: &str) -> &str {
310 if let Some(value) = handle.strip_prefix("at://") {
311 value
312 } else if let Some(value) = handle.strip_prefix('@') {
313 value
314 } else {
315 handle
316 }
317}
318
319/// Validates if a string is a properly formatted PLC DID.
320///
321/// A valid PLC DID must:
322/// - Start with the prefix `did:plc:`
323/// - Be followed by exactly 24 characters of base32 encoding (lowercase letters a-z and digits 2-7)
324///
325/// # Arguments
326///
327/// * `did` - The DID string to validate
328///
329/// # Returns
330///
331/// `true` if the DID is a valid PLC DID, `false` otherwise
332///
333/// # Examples
334///
335/// ```
336/// use atproto_identity::validation::is_valid_did_method_plc;
337///
338/// // Valid PLC DIDs
339/// assert!(is_valid_did_method_plc("did:plc:z3f2222fa222f5c33c2f27ez"));
340/// assert!(is_valid_did_method_plc("did:plc:abcdefghijklmnopqrstuvwx"));
341///
342/// // Invalid PLC DIDs
343/// assert!(!is_valid_did_method_plc("did:web:example.com"));
344/// assert!(!is_valid_did_method_plc("did:plc:invalid0length"));
345/// assert!(!is_valid_did_method_plc("did:plc:UPPERCASE_NOT_ALLOWED"));
346/// ```
347pub fn is_valid_did_method_plc(did: &str) -> bool {
348 let did_value = match did.strip_prefix("did:plc:") {
349 Some(value) => value,
350 None => return false,
351 };
352
353 // Must be exactly 24 characters and all valid base32 (lowercase letters and numbers 2-7)
354 did_value.len() == 24
355 && did_value
356 .chars()
357 .all(|c| c.is_ascii_lowercase() || ('2'..='7').contains(&c))
358}
359
360/// Validates if a string is a properly formatted Web DID.
361///
362/// A valid Web DID must start with the prefix `did:web:` followed by content that
363/// depends on the strictness mode:
364///
365/// # Strict Mode (`strict = true`)
366/// - Only a valid hostname is allowed after `did:web:`
367/// - No additional path segments permitted
368///
369/// # Non-Strict Mode (`strict = false`)
370/// - First segment must be a valid hostname
371/// - Additional colon-separated segments are allowed
372/// - Each additional segment must be non-empty and alphanumeric
373///
374/// # Arguments
375///
376/// * `did` - The DID string to validate
377/// * `strict` - Whether to use strict hostname-only validation
378///
379/// # Returns
380///
381/// `true` if the DID is a valid Web DID according to the specified mode, `false` otherwise
382///
383/// # Examples
384///
385/// ```
386/// use atproto_identity::validation::is_valid_did_method_web;
387///
388/// // Valid in both modes
389/// assert!(is_valid_did_method_web("did:web:example.com", true));
390/// assert!(is_valid_did_method_web("did:web:example.com", false));
391///
392/// // Valid only in non-strict mode
393/// assert!(!is_valid_did_method_web("did:web:example.com:path", true));
394/// assert!(is_valid_did_method_web("did:web:example.com:path", false));
395/// assert!(is_valid_did_method_web("did:web:example.com:path:subpath", false));
396///
397/// // Invalid in both modes
398/// assert!(!is_valid_did_method_web("did:web:192.168.1.1", true));
399/// assert!(!is_valid_did_method_web("did:web:example.com:", false));
400/// ```
401pub fn is_valid_did_method_web(did: &str, strict: bool) -> bool {
402 let did_value = match did.strip_prefix("did:web:") {
403 Some(value) => value,
404 None => return false,
405 };
406
407 if strict {
408 // In strict mode, only a valid hostname is allowed
409 is_valid_hostname(did_value)
410 } else {
411 // In non-strict mode, allow colon-separated segments
412 let segments: Vec<&str> = did_value.split(':').collect();
413
414 // Must have at least one segment (the hostname)
415 if segments.is_empty() {
416 return false;
417 }
418
419 // First segment must be a valid hostname
420 if !is_valid_hostname(segments[0]) {
421 return false;
422 }
423
424 // All subsequent segments must be non-empty alphanumeric strings
425 segments[1..].iter().all(|segment| {
426 !segment.is_empty() && segment.chars().all(|c| c.is_ascii_alphanumeric())
427 })
428 }
429}
430
431/// Validates if a string is a properly formatted WebVH DID.
432///
433/// A WebVH DID extends the Web DID format by adding a SCIM (Self-Controlled Identity Marker)
434/// segment immediately after the `did:webvh:` prefix.
435///
436/// # Format
437///
438/// ```text
439/// did:webvh:<scim>:<content>
440/// ```
441///
442/// Where:
443/// - `<scim>` must contain only base58-btc alphabet characters (`123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz`)
444/// - `<content>` follows the same validation rules as `did:web` content
445///
446/// # Strict vs Non-Strict Mode
447///
448/// **Strict Mode (`strict = true`)**:
449/// - `<content>` must be a valid hostname only
450/// - No additional path segments permitted
451///
452/// **Non-Strict Mode (`strict = false`)**:
453/// - First segment of `<content>` must be a valid hostname
454/// - Additional colon-separated segments are allowed
455/// - Each additional segment must be non-empty and alphanumeric
456///
457/// # Arguments
458///
459/// * `did` - The DID string to validate
460/// * `strict` - Whether to use strict hostname-only validation for the content portion
461///
462/// # Returns
463///
464/// `true` if the DID is a valid WebVH DID according to the specified mode, `false` otherwise
465///
466/// # Examples
467///
468/// ```
469/// use atproto_identity::validation::is_valid_did_method_webvh;
470///
471/// // Valid WebVH DIDs in both modes
472/// assert!(is_valid_did_method_webvh("did:webvh:abc123:example.com", true));
473/// assert!(is_valid_did_method_webvh("did:webvh:XYZ789:sub.example.com", false));
474///
475/// // Valid only in non-strict mode (has path segments)
476/// assert!(!is_valid_did_method_webvh("did:webvh:abc123:example.com:path", true));
477/// assert!(is_valid_did_method_webvh("did:webvh:abc123:example.com:path", false));
478/// assert!(is_valid_did_method_webvh("did:webvh:def456:example.com:path:subpath", false));
479///
480/// // Invalid - SCIM contains excluded base58 characters (0, O, I, l)
481/// assert!(!is_valid_did_method_webvh("did:webvh:0abc:example.com", true));
482/// assert!(!is_valid_did_method_webvh("did:webvh:Oabc:example.com", false));
483/// assert!(!is_valid_did_method_webvh("did:webvh:Iabc:example.com", true));
484/// assert!(!is_valid_did_method_webvh("did:webvh:labc:example.com", false));
485///
486/// // Invalid - wrong format or missing components
487/// assert!(!is_valid_did_method_webvh("did:web:abc123:example.com", true)); // Wrong prefix
488/// assert!(!is_valid_did_method_webvh("did:webvh:abc123", true)); // Missing content
489/// assert!(!is_valid_did_method_webvh("did:webvh::example.com", true)); // Empty SCIM
490/// ```
491pub fn is_valid_did_method_webvh(did: &str, strict: bool) -> bool {
492 let did_value = match did.strip_prefix("did:webvh:") {
493 Some(value) => value,
494 None => return false,
495 };
496
497 // Split by the first colon to separate scim from content
498 let parts: Vec<&str> = did_value.splitn(2, ':').collect();
499
500 // Must have exactly 2 parts: scim and content
501 if parts.len() != 2 {
502 return false;
503 }
504
505 let scim = parts[0];
506 let content = parts[1];
507
508 // Validate scim - must be non-empty and contain only base58-btc alphabet characters
509 if scim.is_empty() || !is_valid_base58_btc(scim) {
510 return false;
511 }
512
513 // Validate content using the same rules as did:web
514 if strict {
515 // In strict mode, only a valid hostname is allowed
516 is_valid_hostname(content)
517 } else {
518 // In non-strict mode, allow colon-separated segments
519 let segments: Vec<&str> = content.split(':').collect();
520
521 // Must have at least one segment (the hostname)
522 if segments.is_empty() {
523 return false;
524 }
525
526 // First segment must be a valid hostname
527 if !is_valid_hostname(segments[0]) {
528 return false;
529 }
530
531 // All subsequent segments must be non-empty alphanumeric strings
532 segments[1..].iter().all(|segment| {
533 !segment.is_empty() && segment.chars().all(|c| c.is_ascii_alphanumeric())
534 })
535 }
536}
537
538/// Checks if a string contains only base58-btc alphabet characters.
539///
540/// The base58-btc alphabet is used in Bitcoin and other cryptocurrency systems.
541/// It includes all alphanumeric characters except those that are easily confused:
542/// - Excludes: `0` (zero), `O` (capital O), `I` (capital I), `l` (lowercase L)
543/// - Includes: `123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz`
544///
545/// # Arguments
546///
547/// * `s` - The string to validate for base58-btc character compliance
548///
549/// # Returns
550///
551/// `true` if the string is non-empty and contains only valid base58-btc characters, `false` otherwise
552///
553/// # Examples
554///
555/// ```
556/// use atproto_identity::validation::is_valid_base58_btc;
557///
558/// // Valid base58-btc strings
559/// assert!(is_valid_base58_btc("123456789"));
560/// assert!(is_valid_base58_btc("ABCDEFGHJKLMNPQRSTUVWXYZabcdefghjkmnpqrstuvwxyz"));
561/// assert!(is_valid_base58_btc("abc123XYZ"));
562///
563/// // Invalid - contains excluded characters
564/// assert!(!is_valid_base58_btc("abc0def")); // Contains 0
565/// assert!(!is_valid_base58_btc("abcOdef")); // Contains O
566/// assert!(!is_valid_base58_btc("abcIdef")); // Contains I
567/// assert!(!is_valid_base58_btc("abcldef")); // Contains l
568///
569/// // Invalid - empty or non-alphanumeric
570/// assert!(!is_valid_base58_btc(""));
571/// assert!(!is_valid_base58_btc("abc-def"));
572/// ```
573pub fn is_valid_base58_btc(s: &str) -> bool {
574 const BASE58_ALPHABET: &str = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
575 !s.is_empty() && s.chars().all(|c| BASE58_ALPHABET.contains(c))
576}
577
578#[cfg(test)]
579mod tests {
580 use super::*;
581
582 #[test]
583 fn test_is_valid_did_method_plc() {
584 // Valid PLC DIDs - exactly 24 base32 characters after "did:plc:"
585 assert!(is_valid_did_method_plc("did:plc:abcdefghijklmnopqrstuvwx"));
586 assert!(is_valid_did_method_plc("did:plc:z3f2222fa222f5c33c2f27ez"));
587 assert!(is_valid_did_method_plc("did:plc:aaaaaaaaaaaaaaaaaaaaaaaa")); // 24 'a's
588 assert!(is_valid_did_method_plc("did:plc:abcdef2345ghijk6mn7pqrst")); // mix of letters and valid numbers
589
590 // Invalid PLC DIDs - contains uppercase letters (not valid base32)
591 assert!(!is_valid_did_method_plc("did:plc:ABCDEFGHIJKLMNOPQRSTUVWX"));
592 assert!(!is_valid_did_method_plc("did:plc:Abcdefghijklmnopqrstuvwx"));
593
594 // Invalid PLC DIDs - contains invalid numbers (0, 1, 8, 9)
595 assert!(!is_valid_did_method_plc("did:plc:123456789012345678901234"));
596 assert!(!is_valid_did_method_plc("did:plc:abcdefghijklmnopqrstuv0x"));
597 assert!(!is_valid_did_method_plc("did:plc:abcdefghijklmnopqrstuv1x"));
598 assert!(!is_valid_did_method_plc("did:plc:abcdefghijklmnopqrstuv8x"));
599 assert!(!is_valid_did_method_plc("did:plc:abcdefghijklmnopqrstuv9x"));
600
601 // Invalid PLC DIDs - wrong prefix
602 assert!(!is_valid_did_method_plc("did:web:abcdefghijklmnopqrstuvwx"));
603 assert!(!is_valid_did_method_plc("did:key:abcdefghijklmnopqrstuvwx"));
604 assert!(!is_valid_did_method_plc("plc:abcdefghijklmnopqrstuvwx"));
605 assert!(!is_valid_did_method_plc("abcdefghijklmnopqrstuvwx"));
606
607 // Invalid PLC DIDs - wrong length (not 24 characters)
608 assert!(!is_valid_did_method_plc("did:plc:"));
609 assert!(!is_valid_did_method_plc("did:plc:abc"));
610 assert!(!is_valid_did_method_plc("did:plc:abcdefghijklmnopqrstuv")); // 23 chars
611 assert!(!is_valid_did_method_plc(
612 "did:plc:abcdefghijklmnopqrstuvwxy"
613 )); // 25 chars
614 assert!(!is_valid_did_method_plc(
615 "did:plc:abcdefghijklmnopqrstuvwxyz"
616 )); // 26 chars
617
618 // Edge cases
619 assert!(!is_valid_did_method_plc(""));
620 assert!(!is_valid_did_method_plc("did:plc"));
621 assert!(!is_valid_did_method_plc("did:plc:"));
622 assert!(!is_valid_did_method_plc("DID:PLC:abcdefghijklmnopqrstuvwx")); // uppercase prefix
623 assert!(!is_valid_did_method_plc("did:PLC:abcdefghijklmnopqrstuvwx")); // uppercase method
624 assert!(!is_valid_did_method_plc(
625 " did:plc:abcdefghijklmnopqrstuvwx"
626 )); // leading space
627 assert!(!is_valid_did_method_plc(
628 "did:plc:abcdefghijklmnopqrstuvwx "
629 )); // trailing space
630
631 // Invalid - special characters (not base32)
632 assert!(!is_valid_did_method_plc("did:plc:abc-def_hij.klm~nop!qrst")); // special chars
633 assert!(!is_valid_did_method_plc("did:plc:~~~!!!@@@###$$$%%%^^^&")); // special chars
634 assert!(!is_valid_did_method_plc("did:plc: ")); // spaces
635 }
636
637 #[test]
638 fn test_is_valid_did_method_web() {
639 // Test strict mode (only hostname allowed)
640 assert!(is_valid_did_method_web("did:web:example.com", true));
641 assert!(is_valid_did_method_web("did:web:sub.example.com", true));
642 assert!(is_valid_did_method_web("did:web:example.co.uk", true));
643 assert!(is_valid_did_method_web("did:web:localhost", true));
644
645 // Invalid in strict mode - contains colon-separated segments
646 assert!(!is_valid_did_method_web("did:web:example.com:path", true));
647 assert!(!is_valid_did_method_web(
648 "did:web:example.com:path:subpath",
649 true
650 ));
651 assert!(!is_valid_did_method_web("did:web:example.com:123", true));
652
653 // Test non-strict mode (allows colon-separated segments)
654 assert!(is_valid_did_method_web("did:web:example.com", false));
655 assert!(is_valid_did_method_web("did:web:example.com:path", false));
656 assert!(is_valid_did_method_web(
657 "did:web:example.com:path:subpath",
658 false
659 ));
660 assert!(is_valid_did_method_web("did:web:example.com:123", false));
661 assert!(is_valid_did_method_web("did:web:example.com:abc123", false));
662 assert!(is_valid_did_method_web(
663 "did:web:example.com:UPPERCASE",
664 false
665 ));
666
667 // Invalid in non-strict mode - empty segments
668 assert!(!is_valid_did_method_web("did:web:example.com:", false));
669 assert!(!is_valid_did_method_web("did:web:example.com::", false));
670 assert!(!is_valid_did_method_web("did:web:example.com:path:", false));
671 assert!(!is_valid_did_method_web("did:web:example.com::path", false));
672
673 // Invalid in non-strict mode - non-alphanumeric in segments
674 assert!(!is_valid_did_method_web(
675 "did:web:example.com:path/subpath",
676 false
677 ));
678 assert!(!is_valid_did_method_web(
679 "did:web:example.com:path-name",
680 false
681 ));
682 assert!(!is_valid_did_method_web(
683 "did:web:example.com:path_name",
684 false
685 ));
686 assert!(!is_valid_did_method_web(
687 "did:web:example.com:path.name",
688 false
689 ));
690 assert!(!is_valid_did_method_web(
691 "did:web:example.com:path@name",
692 false
693 ));
694 assert!(!is_valid_did_method_web(
695 "did:web:example.com:path name",
696 false
697 ));
698
699 // Invalid in both modes - wrong prefix
700 assert!(!is_valid_did_method_web("did:plc:example.com", true));
701 assert!(!is_valid_did_method_web("did:plc:example.com", false));
702 assert!(!is_valid_did_method_web("web:example.com", true));
703 assert!(!is_valid_did_method_web("web:example.com", false));
704 assert!(!is_valid_did_method_web("example.com", true));
705 assert!(!is_valid_did_method_web("example.com", false));
706
707 // Invalid in both modes - invalid hostname
708 assert!(!is_valid_did_method_web("did:web:", true));
709 assert!(!is_valid_did_method_web("did:web:", false));
710 assert!(!is_valid_did_method_web("did:web:example..com", true));
711 assert!(!is_valid_did_method_web("did:web:example..com", false));
712 assert!(!is_valid_did_method_web("did:web:.example.com", true));
713 assert!(!is_valid_did_method_web("did:web:.example.com", false));
714 assert!(!is_valid_did_method_web("did:web:example.com.", true));
715 assert!(!is_valid_did_method_web("did:web:example.com.", false));
716 assert!(!is_valid_did_method_web("did:web:-example.com", true));
717 assert!(!is_valid_did_method_web("did:web:-example.com", false));
718
719 // Invalid in both modes - reserved TLDs
720 assert!(!is_valid_did_method_web("did:web:example.localhost", true));
721 assert!(!is_valid_did_method_web("did:web:example.localhost", false));
722 assert!(!is_valid_did_method_web("did:web:example.local", true));
723 assert!(!is_valid_did_method_web("did:web:example.local", false));
724
725 // Invalid in both modes - IPv4 addresses
726 assert!(!is_valid_did_method_web("did:web:192.168.1.1", true));
727 assert!(!is_valid_did_method_web("did:web:192.168.1.1", false));
728 assert!(!is_valid_did_method_web("did:web:127.0.0.1", true));
729 assert!(!is_valid_did_method_web("did:web:127.0.0.1", false));
730 assert!(!is_valid_did_method_web("did:web:10.0.0.1", true));
731 assert!(!is_valid_did_method_web("did:web:10.0.0.1", false));
732
733 // Invalid in both modes - IPv6 addresses
734 assert!(!is_valid_did_method_web("did:web:2001:db8::1", true));
735 assert!(!is_valid_did_method_web("did:web:2001:db8::1", false));
736 assert!(!is_valid_did_method_web("did:web:::1", true));
737 assert!(!is_valid_did_method_web("did:web:::1", false));
738 assert!(!is_valid_did_method_web("did:web:[2001:db8::1]", true));
739 assert!(!is_valid_did_method_web("did:web:[2001:db8::1]", false));
740 }
741
742 #[test]
743 fn test_is_valid_hostname() {
744 // Valid hostnames
745 assert!(is_valid_hostname("example.com"));
746 assert!(is_valid_hostname("sub.example.com"));
747 assert!(is_valid_hostname("example.co.uk"));
748 assert!(is_valid_hostname("localhost"));
749 assert!(is_valid_hostname("test-host.example.com"));
750 assert!(is_valid_hostname("123.example.com"));
751 assert!(is_valid_hostname("a.b.c.d.example.com"));
752
753 // Invalid - IPv4 addresses
754 assert!(!is_valid_hostname("192.168.1.1"));
755 assert!(!is_valid_hostname("127.0.0.1"));
756 assert!(!is_valid_hostname("10.0.0.1"));
757 assert!(!is_valid_hostname("255.255.255.255"));
758 assert!(!is_valid_hostname("0.0.0.0"));
759
760 // Invalid - IPv6 addresses
761 assert!(!is_valid_hostname("2001:db8::1"));
762 assert!(!is_valid_hostname("::1"));
763 assert!(!is_valid_hostname("fe80::1"));
764 assert!(!is_valid_hostname("[2001:db8::1]"));
765 assert!(!is_valid_hostname("[::1]"));
766 assert!(!is_valid_hostname(
767 "2001:0db8:0000:0000:0000:ff00:0042:8329"
768 ));
769
770 // Invalid - empty or too long
771 assert!(!is_valid_hostname(""));
772 assert!(!is_valid_hostname(&"a".repeat(254))); // Too long
773
774 // Invalid - reserved TLDs
775 assert!(!is_valid_hostname("example.localhost"));
776 assert!(!is_valid_hostname("example.local"));
777 assert!(!is_valid_hostname("example.internal"));
778 assert!(!is_valid_hostname("example.arpa"));
779
780 // Invalid - bad format
781 assert!(!is_valid_hostname("example..com"));
782 assert!(!is_valid_hostname(".example.com"));
783 assert!(!is_valid_hostname("example.com."));
784 assert!(!is_valid_hostname("-example.com"));
785 assert!(!is_valid_hostname("example-.com"));
786 assert!(!is_valid_hostname("exam ple.com"));
787 assert!(!is_valid_hostname("exam@ple.com"));
788 assert!(!is_valid_hostname("exam_ple.com"));
789
790 // Edge cases that should be valid
791 assert!(is_valid_hostname("1.2.3.example.com")); // Numbers are ok in labels
792 assert!(is_valid_hostname("xn--example.com")); // Punycode is valid
793 }
794
795 #[test]
796 fn test_is_valid_did_method_webvh() {
797 // Test strict mode - valid cases
798 assert!(is_valid_did_method_webvh("did:webvh:abc123:example.com", true));
799 assert!(is_valid_did_method_webvh("did:webvh:XYZ789:sub.example.com", true));
800 assert!(is_valid_did_method_webvh("did:webvh:ABCDEFGHJKLMNPQRSTUVWXYZabcdefghjkmnpqrstuvwxyz123456789:example.com", true));
801 assert!(is_valid_did_method_webvh("did:webvh:1:example.com", true)); // single char scim
802 assert!(is_valid_did_method_webvh("did:webvh:zzzzzz:localhost", true));
803
804 // Test strict mode - invalid cases with path segments
805 assert!(!is_valid_did_method_webvh("did:webvh:abc123:example.com:path", true));
806 assert!(!is_valid_did_method_webvh("did:webvh:abc123:example.com:path:subpath", true));
807
808 // Test non-strict mode - valid cases
809 assert!(is_valid_did_method_webvh("did:webvh:abc123:example.com", false));
810 assert!(is_valid_did_method_webvh("did:webvh:abc123:example.com:path", false));
811 assert!(is_valid_did_method_webvh("did:webvh:abc123:example.com:path:subpath", false));
812 assert!(is_valid_did_method_webvh("did:webvh:abc123:example.com:123", false));
813 assert!(is_valid_did_method_webvh("did:webvh:abc123:example.com:ABC123", false));
814
815 // Invalid - wrong prefix
816 assert!(!is_valid_did_method_webvh("did:web:abc123:example.com", true));
817 assert!(!is_valid_did_method_webvh("did:web:abc123:example.com", false));
818 assert!(!is_valid_did_method_webvh("did:plc:abc123:example.com", true));
819 assert!(!is_valid_did_method_webvh("webvh:abc123:example.com", true));
820 assert!(!is_valid_did_method_webvh("abc123:example.com", true));
821
822 // Invalid - missing scim or content
823 assert!(!is_valid_did_method_webvh("did:webvh:", true));
824 assert!(!is_valid_did_method_webvh("did:webvh:abc123", true)); // missing content
825 assert!(!is_valid_did_method_webvh("did:webvh:abc123:", true)); // empty content
826 assert!(!is_valid_did_method_webvh("did:webvh::example.com", true)); // empty scim
827 assert!(!is_valid_did_method_webvh("did:webvh:example.com", true)); // no scim separator
828
829 // Invalid - scim contains invalid base58 characters
830 assert!(!is_valid_did_method_webvh("did:webvh:0abc:example.com", true)); // contains 0
831 assert!(!is_valid_did_method_webvh("did:webvh:Oabc:example.com", true)); // contains O
832 assert!(!is_valid_did_method_webvh("did:webvh:Iabc:example.com", true)); // contains I
833 assert!(!is_valid_did_method_webvh("did:webvh:labc:example.com", true)); // contains l
834 assert!(!is_valid_did_method_webvh("did:webvh:abc-123:example.com", true)); // contains -
835 assert!(!is_valid_did_method_webvh("did:webvh:abc_123:example.com", true)); // contains _
836 assert!(!is_valid_did_method_webvh("did:webvh:abc.123:example.com", true)); // contains .
837 assert!(!is_valid_did_method_webvh("did:webvh:abc@123:example.com", true)); // contains @
838 assert!(!is_valid_did_method_webvh("did:webvh:abc 123:example.com", true)); // contains space
839 assert!(!is_valid_did_method_webvh("did:webvh:abc!123:example.com", true)); // contains !
840 assert!(!is_valid_did_method_webvh("did:webvh:abc#123:example.com", true)); // contains #
841 assert!(!is_valid_did_method_webvh("did:webvh:abc$123:example.com", true)); // contains $
842
843 // Invalid - bad hostname in content
844 assert!(!is_valid_did_method_webvh("did:webvh:abc123:", false)); // empty hostname
845 assert!(!is_valid_did_method_webvh("did:webvh:abc123:..example.com", true));
846 assert!(!is_valid_did_method_webvh("did:webvh:abc123:.example.com", true));
847 assert!(!is_valid_did_method_webvh("did:webvh:abc123:example.com.", true));
848 assert!(!is_valid_did_method_webvh("did:webvh:abc123:-example.com", true));
849 assert!(!is_valid_did_method_webvh("did:webvh:abc123:example.localhost", true)); // reserved TLD
850 assert!(!is_valid_did_method_webvh("did:webvh:abc123:192.168.1.1", true)); // IPv4
851 assert!(!is_valid_did_method_webvh("did:webvh:abc123:2001:db8::1", true)); // IPv6
852
853 // Invalid in non-strict mode - empty path segments
854 assert!(!is_valid_did_method_webvh("did:webvh:abc123:example.com:", false));
855 assert!(!is_valid_did_method_webvh("did:webvh:abc123:example.com::", false));
856 assert!(!is_valid_did_method_webvh("did:webvh:abc123:example.com:path:", false));
857 assert!(!is_valid_did_method_webvh("did:webvh:abc123:example.com::path", false));
858
859 // Invalid in non-strict mode - non-alphanumeric in path segments
860 assert!(!is_valid_did_method_webvh("did:webvh:abc123:example.com:path/subpath", false));
861 assert!(!is_valid_did_method_webvh("did:webvh:abc123:example.com:path-name", false));
862 assert!(!is_valid_did_method_webvh("did:webvh:abc123:example.com:path_name", false));
863 assert!(!is_valid_did_method_webvh("did:webvh:abc123:example.com:path.name", false));
864 assert!(!is_valid_did_method_webvh("did:webvh:abc123:example.com:path@name", false));
865 assert!(!is_valid_did_method_webvh("did:webvh:abc123:example.com:path name", false));
866
867 // Edge cases with base58 characters
868 assert!(is_valid_did_method_webvh("did:webvh:111111:example.com", true)); // all 1s
869 assert!(is_valid_did_method_webvh("did:webvh:999999:example.com", true)); // all 9s
870 assert!(is_valid_did_method_webvh("did:webvh:AAAAAA:example.com", true)); // all As
871 assert!(is_valid_did_method_webvh("did:webvh:zzzzzz:example.com", true)); // all zs
872 assert!(is_valid_did_method_webvh("did:webvh:HJKLMNPQRSTUVWXYZabcdefghjkmnpqrstuvwxyz:example.com", true)); // no excluded letters
873 }
874
875 #[test]
876 fn test_is_valid_base58_btc() {
877 // Valid base58 strings
878 assert!(is_valid_base58_btc("123456789"));
879 assert!(is_valid_base58_btc("ABCDEFGHJKLMNPQRSTUVWXYZabcdefghjkmnpqrstuvwxyz"));
880 assert!(is_valid_base58_btc("1"));
881 assert!(is_valid_base58_btc("z"));
882 assert!(is_valid_base58_btc("ABC123xyz"));
883
884 // Invalid - contains excluded characters
885 assert!(!is_valid_base58_btc("0")); // zero
886 assert!(!is_valid_base58_btc("O")); // capital O
887 assert!(!is_valid_base58_btc("I")); // capital I
888 assert!(!is_valid_base58_btc("l")); // lowercase l
889 assert!(!is_valid_base58_btc("abc0def"));
890 assert!(!is_valid_base58_btc("abcOdef"));
891 assert!(!is_valid_base58_btc("abcIdef"));
892 assert!(!is_valid_base58_btc("abcldef"));
893
894 // Invalid - contains non-alphanumeric characters
895 assert!(!is_valid_base58_btc("abc-def"));
896 assert!(!is_valid_base58_btc("abc_def"));
897 assert!(!is_valid_base58_btc("abc.def"));
898 assert!(!is_valid_base58_btc("abc@def"));
899 assert!(!is_valid_base58_btc("abc def"));
900 assert!(!is_valid_base58_btc("abc!def"));
901 assert!(!is_valid_base58_btc(""));
902
903 // Edge cases
904 assert!(is_valid_base58_btc("i")); // lowercase i is allowed
905 assert!(is_valid_base58_btc("o")); // lowercase o is allowed
906 assert!(is_valid_base58_btc("ioio")); // lowercase i and o are allowed
907 }
908}