Skip to main content

internetarchive_rs/
ids.rs

1//! Identifier newtypes used by the public API.
2
3use std::fmt;
4use std::str::FromStr;
5
6use secrecy::SecretString;
7use serde::{Deserialize, Deserializer, Serialize};
8use thiserror::Error;
9
10/// Item identifier used by Internet Archive.
11#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
12#[serde(transparent)]
13pub struct ItemIdentifier(String);
14
15/// Task identifier returned by Metadata Write.
16#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
17#[serde(transparent)]
18pub struct TaskId(
19    /// Raw numeric task identifier.
20    pub u64,
21);
22
23/// Validation errors for [`ItemIdentifier`].
24#[derive(Clone, Debug, PartialEq, Eq, Error)]
25pub enum IdentifierError {
26    /// Identifier was empty after trimming.
27    #[error("item identifier cannot be empty")]
28    Empty,
29    /// Identifier exceeds Internet Archive's documented maximum length.
30    #[error("item identifier {identifier:?} is too long; maximum length is {max}")]
31    TooLong {
32        /// Identifier value that failed validation.
33        identifier: String,
34        /// Maximum general identifier length.
35        max: usize,
36    },
37    /// Identifier starts with an unsupported character.
38    #[error(
39        "invalid first character {character:?} in identifier {identifier:?}; identifiers must start with an ASCII letter or digit"
40    )]
41    InvalidStartCharacter {
42        /// Identifier value that failed validation.
43        identifier: String,
44        /// Unsupported first character.
45        character: char,
46    },
47    /// Identifier is too short for Internet Archive's S3 bucket-creation layer.
48    #[error(
49        "item identifier {identifier:?} is too short for bucket creation; minimum length is {min}"
50    )]
51    TooShortForBucketCreation {
52        /// Identifier value that failed validation.
53        identifier: String,
54        /// Minimum bucket-creation-safe length.
55        min: usize,
56    },
57    /// Identifier is too long for Internet Archive's S3 bucket-creation layer.
58    #[error(
59        "item identifier {identifier:?} is too long for bucket creation; maximum length is {max}"
60    )]
61    TooLongForBucketCreation {
62        /// Identifier value that failed validation.
63        identifier: String,
64        /// Maximum bucket-creation-safe length.
65        max: usize,
66    },
67    /// Identifier contains an unsupported character.
68    #[error("invalid character {character:?} in identifier {identifier:?}")]
69    InvalidCharacter {
70        /// Original identifier value after trimming.
71        identifier: String,
72        /// Unsupported character.
73        character: char,
74    },
75    /// Identifier contains a character that the conservative IA-S3 bucket
76    /// creation subset rejects.
77    #[error(
78        "invalid bucket-creation character {character:?} in identifier {identifier:?}; bucket-creation identifiers may contain only lowercase ASCII letters, digits, periods, and dashes"
79    )]
80    InvalidBucketCreationCharacter {
81        /// Identifier value that failed validation.
82        identifier: String,
83        /// Unsupported bucket-creation character.
84        character: char,
85    },
86    /// Identifier starts or ends with a character that the conservative IA-S3
87    /// bucket-creation subset rejects.
88    #[error(
89        "invalid bucket-creation edge character {character:?} in identifier {identifier:?}; bucket-creation identifiers must start and end with a lowercase ASCII letter or digit"
90    )]
91    InvalidBucketCreationEdgeCharacter {
92        /// Identifier value that failed validation.
93        identifier: String,
94        /// Unsupported first or last bucket-creation character.
95        character: char,
96    },
97    /// Identifier contains adjacent periods that S3 bucket creation rejects.
98    #[error(
99        "item identifier {identifier:?} is invalid for bucket creation; S3 bucket names cannot contain adjacent periods"
100    )]
101    AdjacentBucketCreationPeriods {
102        /// Identifier value that failed validation.
103        identifier: String,
104    },
105    /// Identifier has the shape of an IPv4 address, which S3 bucket creation rejects.
106    #[error(
107        "item identifier {identifier:?} is invalid for bucket creation; S3 bucket names cannot be formatted as an IPv4 address"
108    )]
109    BucketCreationIdentifierLooksLikeIpAddress {
110        /// Identifier value that failed validation.
111        identifier: String,
112    },
113    /// Identifier contains a period adjacent to a dash, which S3-compatible bucket creation rejects.
114    #[error(
115        "item identifier {identifier:?} is invalid for bucket creation; S3 bucket names cannot contain periods adjacent to dashes"
116    )]
117    PeriodAdjacentBucketCreationDash {
118        /// Identifier value that failed validation.
119        identifier: String,
120    },
121}
122
123impl ItemIdentifier {
124    /// Longest item identifier documented by Internet Archive.
125    pub const MAX_IDENTIFIER_LEN: usize = 100;
126    /// Shortest identifier accepted by the conservative IA-S3 bucket-creation subset.
127    pub const MIN_BUCKET_IDENTIFIER_LEN: usize = 3;
128    /// Longest identifier accepted by the conservative IA-S3 bucket-creation subset.
129    pub const MAX_BUCKET_IDENTIFIER_LEN: usize = 63;
130
131    /// Creates a validated item identifier.
132    ///
133    /// # Errors
134    ///
135    /// Returns an error if the identifier is empty, longer than the documented
136    /// maximum, does not start with an ASCII letter or digit, or contains
137    /// characters outside of `[A-Za-z0-9_.-]`.
138    pub fn new(value: impl AsRef<str>) -> Result<Self, IdentifierError> {
139        let trimmed = value.as_ref().trim();
140        if trimmed.is_empty() {
141            return Err(IdentifierError::Empty);
142        }
143
144        if trimmed.len() > Self::MAX_IDENTIFIER_LEN {
145            return Err(IdentifierError::TooLong {
146                identifier: trimmed.to_owned(),
147                max: Self::MAX_IDENTIFIER_LEN,
148            });
149        }
150
151        let Some(first) = trimmed.chars().next() else {
152            return Err(IdentifierError::Empty);
153        };
154        if !first.is_ascii_alphanumeric() {
155            return Err(IdentifierError::InvalidStartCharacter {
156                identifier: trimmed.to_owned(),
157                character: first,
158            });
159        }
160
161        if let Some(character) = trimmed.chars().find(|character| {
162            !character.is_ascii_alphanumeric()
163                && *character != '_'
164                && *character != '-'
165                && *character != '.'
166        }) {
167            return Err(IdentifierError::InvalidCharacter {
168                identifier: trimmed.to_owned(),
169                character,
170            });
171        }
172
173        Ok(Self(trimmed.to_owned()))
174    }
175
176    /// Returns the raw identifier string.
177    #[must_use]
178    pub fn as_str(&self) -> &str {
179        &self.0
180    }
181
182    /// Validates that this identifier can safely create an IA-S3 bucket.
183    ///
184    /// Internet Archive's general item identifiers can include uppercase
185    /// letters, underscores, and periods, but its upload path maps item
186    /// identifiers to S3 bucket names when creating new items. Bucket creation
187    /// therefore uses a conservative DNS-compatible subset.
188    ///
189    /// This is intentionally narrower than Internet Archive's general
190    /// identifier rules and the Python client's optional S3 identifier
191    /// validator. Use it only before requests that ask IA-S3 to create a
192    /// bucket, not before existing-item upload, delete, or queue-limit checks.
193    ///
194    /// # Errors
195    ///
196    /// Returns an error if the identifier is outside the bucket-creation-safe
197    /// length range, contains a bucket-unsafe character, starts or ends with a
198    /// character rejected by IA-S3 bucket creation, contains adjacent periods,
199    /// contains a period next to a dash, or looks like an IPv4 address.
200    pub fn validate_for_bucket_creation(&self) -> Result<(), IdentifierError> {
201        let identifier = self.as_str();
202        let length = identifier.len();
203
204        if length < Self::MIN_BUCKET_IDENTIFIER_LEN {
205            return Err(IdentifierError::TooShortForBucketCreation {
206                identifier: identifier.to_owned(),
207                min: Self::MIN_BUCKET_IDENTIFIER_LEN,
208            });
209        }
210
211        if length > Self::MAX_BUCKET_IDENTIFIER_LEN {
212            return Err(IdentifierError::TooLongForBucketCreation {
213                identifier: identifier.to_owned(),
214                max: Self::MAX_BUCKET_IDENTIFIER_LEN,
215            });
216        }
217
218        if let Some(character) = identifier
219            .chars()
220            .find(|character| !is_bucket_creation_safe_character(*character))
221        {
222            return Err(IdentifierError::InvalidBucketCreationCharacter {
223                identifier: identifier.to_owned(),
224                character,
225            });
226        }
227
228        for character in [identifier.chars().next(), identifier.chars().next_back()]
229            .into_iter()
230            .flatten()
231        {
232            if !is_bucket_creation_safe_edge_character(character) {
233                return Err(IdentifierError::InvalidBucketCreationEdgeCharacter {
234                    identifier: identifier.to_owned(),
235                    character,
236                });
237            }
238        }
239
240        if identifier.contains("..") {
241            return Err(IdentifierError::AdjacentBucketCreationPeriods {
242                identifier: identifier.to_owned(),
243            });
244        }
245
246        if looks_like_ipv4_address(identifier) {
247            return Err(
248                IdentifierError::BucketCreationIdentifierLooksLikeIpAddress {
249                    identifier: identifier.to_owned(),
250                },
251            );
252        }
253
254        if identifier.contains("-.") || identifier.contains(".-") {
255            return Err(IdentifierError::PeriodAdjacentBucketCreationDash {
256                identifier: identifier.to_owned(),
257            });
258        }
259
260        Ok(())
261    }
262}
263
264fn is_bucket_creation_safe_character(character: char) -> bool {
265    character.is_ascii_lowercase()
266        || character.is_ascii_digit()
267        || character == '-'
268        || character == '.'
269}
270
271fn is_bucket_creation_safe_edge_character(character: char) -> bool {
272    character.is_ascii_lowercase() || character.is_ascii_digit()
273}
274
275fn looks_like_ipv4_address(identifier: &str) -> bool {
276    let mut parts = identifier.split('.');
277    let Some(first) = parts.next() else {
278        return false;
279    };
280    let Some(second) = parts.next() else {
281        return false;
282    };
283    let Some(third) = parts.next() else {
284        return false;
285    };
286    let Some(fourth) = parts.next() else {
287        return false;
288    };
289    if parts.next().is_some() {
290        return false;
291    }
292
293    [first, second, third, fourth]
294        .into_iter()
295        .all(|part| part.parse::<u8>().is_ok())
296}
297
298impl fmt::Display for ItemIdentifier {
299    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
300        self.0.fmt(f)
301    }
302}
303
304impl FromStr for ItemIdentifier {
305    type Err = IdentifierError;
306
307    fn from_str(s: &str) -> Result<Self, Self::Err> {
308        Self::new(s)
309    }
310}
311
312impl<'de> Deserialize<'de> for ItemIdentifier {
313    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
314    where
315        D: Deserializer<'de>,
316    {
317        let value = String::deserialize(deserializer)?;
318        Self::new(value).map_err(serde::de::Error::custom)
319    }
320}
321
322impl TryFrom<&str> for ItemIdentifier {
323    type Error = IdentifierError;
324
325    fn try_from(value: &str) -> Result<Self, Self::Error> {
326        Self::new(value)
327    }
328}
329
330impl TryFrom<String> for ItemIdentifier {
331    type Error = IdentifierError;
332
333    fn try_from(value: String) -> Result<Self, Self::Error> {
334        Self::new(value)
335    }
336}
337
338impl From<ItemIdentifier> for String {
339    fn from(value: ItemIdentifier) -> Self {
340        value.0
341    }
342}
343
344impl From<u64> for TaskId {
345    fn from(value: u64) -> Self {
346        Self(value)
347    }
348}
349
350impl fmt::Display for TaskId {
351    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
352        self.0.fmt(f)
353    }
354}
355
356/// Pair of LOW-auth secrets used for authenticated Internet Archive requests.
357#[derive(Clone)]
358pub(crate) struct SecretPair {
359    pub(crate) access_key: SecretString,
360    pub(crate) secret_key: SecretString,
361}
362
363impl std::fmt::Debug for SecretPair {
364    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
365        f.debug_struct("SecretPair")
366            .field("access_key", &"<redacted>")
367            .field("secret_key", &"<redacted>")
368            .finish()
369    }
370}
371
372#[cfg(test)]
373mod tests {
374    use secrecy::SecretString;
375
376    use super::{IdentifierError, ItemIdentifier, SecretPair, TaskId};
377
378    #[test]
379    fn item_identifier_accepts_documented_shapes() {
380        assert_eq!(
381            ItemIdentifier::new("xfetch-2026_demo").unwrap().as_str(),
382            "xfetch-2026_demo"
383        );
384        assert_eq!(
385            "demo_item".parse::<ItemIdentifier>().unwrap().as_str(),
386            "demo_item"
387        );
388        assert_eq!(
389            ItemIdentifier::new("Demo_Item").unwrap().as_str(),
390            "Demo_Item"
391        );
392        assert_eq!(
393            ItemIdentifier::new("Demo.Item_2026").unwrap().as_str(),
394            "Demo.Item_2026"
395        );
396    }
397
398    #[test]
399    fn item_identifier_rejects_empty_and_invalid_values() {
400        assert_eq!(
401            ItemIdentifier::new("   ").unwrap_err(),
402            IdentifierError::Empty
403        );
404        let long_identifier = "a".repeat(ItemIdentifier::MAX_IDENTIFIER_LEN + 1);
405        assert_eq!(
406            ItemIdentifier::new(&long_identifier).unwrap_err(),
407            IdentifierError::TooLong {
408                identifier: long_identifier,
409                max: ItemIdentifier::MAX_IDENTIFIER_LEN,
410            }
411        );
412        assert_eq!(
413            ItemIdentifier::new("-bad").unwrap_err(),
414            IdentifierError::InvalidStartCharacter {
415                identifier: String::from("-bad"),
416                character: '-',
417            }
418        );
419        assert_eq!(
420            ItemIdentifier::new("_bad").unwrap_err(),
421            IdentifierError::InvalidStartCharacter {
422                identifier: String::from("_bad"),
423                character: '_',
424            }
425        );
426        assert_eq!(
427            ItemIdentifier::new(".bad").unwrap_err(),
428            IdentifierError::InvalidStartCharacter {
429                identifier: String::from(".bad"),
430                character: '.',
431            }
432        );
433        assert!(matches!(
434            ItemIdentifier::new("bad item").unwrap_err(),
435            IdentifierError::InvalidCharacter { character: ' ', .. }
436        ));
437        assert!(matches!(
438            ItemIdentifier::new("bad/item").unwrap_err(),
439            IdentifierError::InvalidCharacter { character: '/', .. }
440        ));
441    }
442
443    #[test]
444    fn item_identifier_validates_bucket_creation_safe_subset() {
445        ItemIdentifier::new("demo-item.2026")
446            .unwrap()
447            .validate_for_bucket_creation()
448            .unwrap();
449
450        assert_eq!(
451            ItemIdentifier::new("ab")
452                .unwrap()
453                .validate_for_bucket_creation()
454                .unwrap_err(),
455            IdentifierError::TooShortForBucketCreation {
456                identifier: String::from("ab"),
457                min: ItemIdentifier::MIN_BUCKET_IDENTIFIER_LEN,
458            }
459        );
460
461        let long_identifier = "a".repeat(ItemIdentifier::MAX_BUCKET_IDENTIFIER_LEN + 1);
462        assert_eq!(
463            ItemIdentifier::new(&long_identifier)
464                .unwrap()
465                .validate_for_bucket_creation()
466                .unwrap_err(),
467            IdentifierError::TooLongForBucketCreation {
468                identifier: long_identifier,
469                max: ItemIdentifier::MAX_BUCKET_IDENTIFIER_LEN,
470            }
471        );
472
473        assert_eq!(
474            ItemIdentifier::new("Demo-item")
475                .unwrap()
476                .validate_for_bucket_creation()
477                .unwrap_err(),
478            IdentifierError::InvalidBucketCreationCharacter {
479                identifier: String::from("Demo-item"),
480                character: 'D',
481            }
482        );
483        assert_eq!(
484            ItemIdentifier::new("demo_item")
485                .unwrap()
486                .validate_for_bucket_creation()
487                .unwrap_err(),
488            IdentifierError::InvalidBucketCreationCharacter {
489                identifier: String::from("demo_item"),
490                character: '_',
491            }
492        );
493        assert_eq!(
494            ItemIdentifier::new("demo-")
495                .unwrap()
496                .validate_for_bucket_creation()
497                .unwrap_err(),
498            IdentifierError::InvalidBucketCreationEdgeCharacter {
499                identifier: String::from("demo-"),
500                character: '-',
501            }
502        );
503        assert_eq!(
504            ItemIdentifier::new("demo.")
505                .unwrap()
506                .validate_for_bucket_creation()
507                .unwrap_err(),
508            IdentifierError::InvalidBucketCreationEdgeCharacter {
509                identifier: String::from("demo."),
510                character: '.',
511            }
512        );
513        assert_eq!(
514            ItemIdentifier::new("demo..item")
515                .unwrap()
516                .validate_for_bucket_creation()
517                .unwrap_err(),
518            IdentifierError::AdjacentBucketCreationPeriods {
519                identifier: String::from("demo..item"),
520            }
521        );
522        assert_eq!(
523            ItemIdentifier::new("192.168.5.4")
524                .unwrap()
525                .validate_for_bucket_creation()
526                .unwrap_err(),
527            IdentifierError::BucketCreationIdentifierLooksLikeIpAddress {
528                identifier: String::from("192.168.5.4"),
529            }
530        );
531        assert_eq!(
532            ItemIdentifier::new("demo-.item")
533                .unwrap()
534                .validate_for_bucket_creation()
535                .unwrap_err(),
536            IdentifierError::PeriodAdjacentBucketCreationDash {
537                identifier: String::from("demo-.item"),
538            }
539        );
540        assert_eq!(
541            ItemIdentifier::new("demo.-item")
542                .unwrap()
543                .validate_for_bucket_creation()
544                .unwrap_err(),
545            IdentifierError::PeriodAdjacentBucketCreationDash {
546                identifier: String::from("demo.-item"),
547            }
548        );
549        for identifier in [
550            "xn--demo",
551            "sthree-demo",
552            "amzn-s3-demo-item",
553            "demo-s3alias",
554            "demo--ol-s3",
555            "demo.mrap",
556            "demo--x-s3",
557            "demo--table-s3",
558        ] {
559            ItemIdentifier::new(identifier)
560                .unwrap()
561                .validate_for_bucket_creation()
562                .unwrap();
563        }
564    }
565
566    #[test]
567    fn task_ids_round_trip() {
568        let task = TaskId::from(42_u64);
569        assert_eq!(task.0, 42);
570        assert_eq!(task.to_string(), "42");
571    }
572
573    #[test]
574    fn identifier_try_from_and_string_round_trip_work() {
575        let identifier = ItemIdentifier::try_from(String::from("demo-item")).unwrap();
576        assert_eq!(identifier.as_str(), "demo-item");
577        assert_eq!(identifier.to_string(), "demo-item");
578        assert_eq!(String::from(identifier.clone()), "demo-item");
579        assert_eq!(ItemIdentifier::try_from("demo-item").unwrap(), identifier);
580    }
581
582    #[test]
583    fn identifier_serde_round_trip_validates_values() {
584        let identifier: ItemIdentifier = serde_json::from_str("\"Demo.Item_2026\"").unwrap();
585        assert_eq!(identifier.as_str(), "Demo.Item_2026");
586        assert_eq!(
587            serde_json::to_string(&identifier).unwrap(),
588            "\"Demo.Item_2026\""
589        );
590        assert!(serde_json::from_str::<ItemIdentifier>("\"bad item\"").is_err());
591    }
592
593    #[test]
594    fn secret_pair_debug_is_redacted() {
595        let secrets = SecretPair {
596            access_key: SecretString::from(String::from("actual-access-secret")),
597            secret_key: SecretString::from(String::from("actual-secret-key")),
598        };
599
600        let debug = format!("{secrets:?}");
601        assert!(debug.contains("<redacted>"));
602        assert!(!debug.contains("actual-access-secret"));
603        assert!(!debug.contains("actual-secret-key"));
604    }
605}