Skip to main content

internetarchive_rs/
error.rs

1//! Error types and response decoding.
2
3use reqwest::{Response, StatusCode};
4use serde::Deserialize;
5use serde_json::Value;
6use thiserror::Error;
7
8use crate::ids::IdentifierError;
9
10/// Errors produced by the Internet Archive client.
11#[derive(Debug, Error)]
12pub enum InternetArchiveError {
13    /// The service returned a non-success HTTP response.
14    #[error("Internet Archive returned HTTP {status}: {message:?}")]
15    Http {
16        /// HTTP status code.
17        status: StatusCode,
18        /// Machine-friendly code when available.
19        code: Option<String>,
20        /// Human-readable summary when available.
21        message: Option<String>,
22        /// Trimmed raw response body.
23        raw_body: Option<String>,
24    },
25    /// Metadata write returned `success: false`.
26    #[error("metadata write failed: {message}")]
27    MetadataWriteFailed {
28        /// Error message returned by MDAPI.
29        message: String,
30        /// Trimmed raw response body.
31        raw_body: Option<String>,
32    },
33    /// A public item could not be found.
34    #[error("item not found: {identifier}")]
35    ItemNotFound {
36        /// Requested item identifier.
37        identifier: String,
38    },
39    /// The client was used for an authenticated operation without credentials.
40    #[error("this operation requires Internet Archive credentials")]
41    MissingAuth,
42    /// An upload policy rejected an existing file.
43    #[error("item already contains file and selected policy forbids overwrite: {filename}")]
44    UploadConflict {
45        /// Conflicting file name.
46        filename: String,
47    },
48    /// A requested file was not present on an item.
49    #[error("item is missing file: {filename}")]
50    MissingFile {
51        /// Missing file name.
52        filename: String,
53    },
54    /// A workflow invariant was violated.
55    #[error("invalid Internet Archive state: {0}")]
56    InvalidState(String),
57    /// Polling timed out before the requested state was visible.
58    #[error("timed out waiting for Internet Archive {0}")]
59    Timeout(&'static str),
60    /// Request transport failed.
61    #[error(transparent)]
62    Transport(#[from] reqwest::Error),
63    /// JSON encoding or decoding failed.
64    #[error(transparent)]
65    Json(#[from] serde_json::Error),
66    /// XML decoding failed.
67    #[error(transparent)]
68    Xml(#[from] quick_xml::DeError),
69    /// Local I/O failed.
70    #[error(transparent)]
71    Io(#[from] std::io::Error),
72    /// URL construction failed.
73    #[error(transparent)]
74    Url(#[from] url::ParseError),
75    /// Environment lookup failed.
76    #[error("failed to read environment variable {name}: {source}")]
77    EnvVar {
78        /// Environment variable name.
79        name: String,
80        /// Underlying lookup error.
81        #[source]
82        source: std::env::VarError,
83    },
84    /// Item identifier validation failed.
85    #[error(transparent)]
86    Identifier(#[from] IdentifierError),
87}
88
89impl InternetArchiveError {
90    pub(crate) async fn from_response(response: Response) -> Self {
91        let status = response.status();
92        let content_type = response
93            .headers()
94            .get(reqwest::header::CONTENT_TYPE)
95            .and_then(|value| value.to_str().ok())
96            .map(str::to_owned);
97
98        let body = match response.bytes().await {
99            Ok(bytes) => bytes,
100            Err(error) => return Self::Transport(error),
101        };
102
103        decode_http_error(status, content_type.as_deref(), &body)
104    }
105}
106
107#[derive(Debug, Deserialize)]
108struct MdapiError {
109    #[serde(default)]
110    success: Option<bool>,
111    #[serde(default)]
112    error: Option<String>,
113    #[serde(default)]
114    message: Option<String>,
115    #[serde(default)]
116    code: Option<String>,
117}
118
119#[derive(Debug, Deserialize)]
120struct XmlError {
121    #[serde(rename = "Code")]
122    code: Option<String>,
123    #[serde(rename = "Message")]
124    message: Option<String>,
125}
126
127pub(crate) fn decode_http_error(
128    status: StatusCode,
129    content_type: Option<&str>,
130    body: &[u8],
131) -> InternetArchiveError {
132    let raw_body = trimmed_body(body);
133
134    if looks_like_json(content_type, body) {
135        if let Ok(parsed) = serde_json::from_slice::<MdapiError>(body) {
136            return InternetArchiveError::Http {
137                status,
138                code: parsed.code,
139                message: parsed.error.or(parsed.message).or(raw_body.clone()),
140                raw_body,
141            };
142        }
143
144        if let Ok(parsed) = serde_json::from_slice::<Value>(body) {
145            return InternetArchiveError::Http {
146                status,
147                code: parsed
148                    .get("code")
149                    .and_then(Value::as_str)
150                    .map(str::to_owned),
151                message: parsed
152                    .get("error")
153                    .and_then(Value::as_str)
154                    .or_else(|| parsed.get("message").and_then(Value::as_str))
155                    .or_else(|| parsed.get("title").and_then(Value::as_str))
156                    .map(str::to_owned)
157                    .or(raw_body.clone()),
158                raw_body,
159            };
160        }
161    }
162
163    if looks_like_xml(content_type, body) {
164        if let Ok(parsed) = quick_xml::de::from_str::<XmlError>(&String::from_utf8_lossy(body)) {
165            return InternetArchiveError::Http {
166                status,
167                code: parsed.code,
168                message: parsed.message.or(raw_body.clone()),
169                raw_body,
170            };
171        }
172    }
173
174    InternetArchiveError::Http {
175        status,
176        code: None,
177        message: raw_body.clone(),
178        raw_body,
179    }
180}
181
182pub(crate) fn decode_metadata_write_failure(body: &[u8]) -> Result<(), InternetArchiveError> {
183    let parsed: MdapiError = serde_json::from_slice(body)?;
184    match parsed.success {
185        Some(true) => Ok(()),
186        _ => Err(InternetArchiveError::MetadataWriteFailed {
187            message: parsed
188                .error
189                .or(parsed.message)
190                .unwrap_or_else(|| "unknown metadata write error".to_owned()),
191            raw_body: trimmed_body(body),
192        }),
193    }
194}
195
196fn looks_like_json(content_type: Option<&str>, body: &[u8]) -> bool {
197    if content_type
198        .is_some_and(|value| value.starts_with("application/json") || value.ends_with("+json"))
199    {
200        return true;
201    }
202
203    body.iter()
204        .find(|byte| !byte.is_ascii_whitespace())
205        .is_some_and(|byte| matches!(byte, b'{' | b'['))
206}
207
208fn looks_like_xml(content_type: Option<&str>, body: &[u8]) -> bool {
209    if content_type
210        .is_some_and(|value| value.starts_with("application/xml") || value.starts_with("text/xml"))
211    {
212        return true;
213    }
214
215    body.iter()
216        .find(|byte| !byte.is_ascii_whitespace())
217        .is_some_and(|byte| *byte == b'<')
218}
219
220fn trimmed_body(body: &[u8]) -> Option<String> {
221    let text = String::from_utf8_lossy(body);
222    for line in text.lines().map(str::trim) {
223        if !line.is_empty() {
224            return Some(line.chars().take(512).collect());
225        }
226    }
227
228    None
229}
230
231#[cfg(test)]
232mod tests {
233    use super::{decode_http_error, decode_metadata_write_failure, InternetArchiveError};
234    use axum::http::StatusCode as AxumStatusCode;
235    use axum::routing::get;
236    use axum::{Json, Router};
237    use reqwest::StatusCode;
238    use serde_json::json;
239    use tokio::net::TcpListener;
240
241    #[test]
242    fn decodes_json_http_errors() {
243        let error = decode_http_error(
244            StatusCode::BAD_REQUEST,
245            Some("application/json"),
246            br#"{"error":"no changes made"}"#,
247        );
248
249        match error {
250            InternetArchiveError::Http { message, .. } => {
251                assert_eq!(message.as_deref(), Some("no changes made"));
252            }
253            other => panic!("unexpected error: {other:?}"),
254        }
255    }
256
257    #[test]
258    fn decodes_xml_http_errors() {
259        let error = decode_http_error(
260            StatusCode::SERVICE_UNAVAILABLE,
261            Some("application/xml"),
262            br"<Error><Code>SlowDown</Code><Message>Too many requests</Message></Error>",
263        );
264
265        match error {
266            InternetArchiveError::Http { code, message, .. } => {
267                assert_eq!(code.as_deref(), Some("SlowDown"));
268                assert_eq!(message.as_deref(), Some("Too many requests"));
269            }
270            other => panic!("unexpected error: {other:?}"),
271        }
272    }
273
274    #[test]
275    fn decodes_plain_text_http_errors() {
276        let error = decode_http_error(StatusCode::BAD_GATEWAY, Some("text/plain"), b"gateway down");
277        match error {
278            InternetArchiveError::Http { message, .. } => {
279                assert_eq!(message.as_deref(), Some("gateway down"));
280            }
281            other => panic!("unexpected error: {other:?}"),
282        }
283    }
284
285    #[test]
286    fn metadata_write_failure_detection_treats_success_false_as_error() {
287        let error = decode_metadata_write_failure(
288            br#"{"success":false,"error":"No changes made to _meta.xml"}"#,
289        )
290        .unwrap_err();
291        match error {
292            InternetArchiveError::MetadataWriteFailed { message, .. } => {
293                assert!(message.contains("No changes made"));
294            }
295            other => panic!("unexpected error: {other:?}"),
296        }
297
298        assert!(decode_metadata_write_failure(br#"{"success":true,"task_id":1}"#).is_ok());
299    }
300
301    #[test]
302    fn decodes_json_fallback_value_errors_and_body_heuristics() {
303        let error = decode_http_error(
304            StatusCode::BAD_REQUEST,
305            None,
306            br#"  {"error":{"nested":true},"title":"fallback title","code":"bad_request"}"#,
307        );
308
309        match error {
310            InternetArchiveError::Http {
311                code,
312                message,
313                raw_body,
314                ..
315            } => {
316                assert_eq!(code.as_deref(), Some("bad_request"));
317                assert_eq!(message.as_deref(), Some("fallback title"));
318                assert!(raw_body.unwrap().contains("fallback title"));
319            }
320            other => panic!("unexpected error: {other:?}"),
321        }
322    }
323
324    #[test]
325    fn decodes_xml_without_content_type_and_trims_text_bodies() {
326        let error = decode_http_error(
327            StatusCode::BAD_GATEWAY,
328            None,
329            b"\n   <Error><Message>temporary outage</Message></Error>",
330        );
331
332        match error {
333            InternetArchiveError::Http { message, .. } => {
334                assert_eq!(message.as_deref(), Some("temporary outage"));
335            }
336            other => panic!("unexpected error: {other:?}"),
337        }
338
339        let long_text = format!("\n\n{}", "x".repeat(600));
340        let trimmed = decode_http_error(
341            StatusCode::BAD_GATEWAY,
342            Some("text/plain"),
343            long_text.as_bytes(),
344        );
345        match trimmed {
346            InternetArchiveError::Http { message, .. } => {
347                assert_eq!(message.unwrap().len(), 512);
348            }
349            other => panic!("unexpected error: {other:?}"),
350        }
351    }
352
353    #[test]
354    fn metadata_write_failure_without_message_uses_default_error() {
355        let error = decode_metadata_write_failure(br#"{"success":false}"#).unwrap_err();
356        match error {
357            InternetArchiveError::MetadataWriteFailed { message, raw_body } => {
358                assert_eq!(message, "unknown metadata write error");
359                assert_eq!(raw_body.as_deref(), Some(r#"{"success":false}"#));
360            }
361            other => panic!("unexpected error: {other:?}"),
362        }
363    }
364
365    #[test]
366    fn empty_plaintext_body_produces_no_message() {
367        let error = decode_http_error(StatusCode::BAD_GATEWAY, Some("text/plain"), b"\n \n\t");
368        match error {
369            InternetArchiveError::Http {
370                message, raw_body, ..
371            } => {
372                assert_eq!(message, None);
373                assert_eq!(raw_body, None);
374            }
375            other => panic!("unexpected error: {other:?}"),
376        }
377    }
378
379    #[tokio::test]
380    async fn from_response_decodes_http_failures() {
381        async fn handler() -> (AxumStatusCode, Json<serde_json::Value>) {
382            (
383                AxumStatusCode::BAD_REQUEST,
384                Json(json!({"error":"request failed","code":"bad_request"})),
385            )
386        }
387
388        let app = Router::new().route("/", get(handler));
389        let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
390        let addr = listener.local_addr().unwrap();
391        let server = tokio::spawn(async move { axum::serve(listener, app).await.unwrap() });
392
393        let response = reqwest::get(format!("http://{addr}/")).await.unwrap();
394        let error = InternetArchiveError::from_response(response).await;
395        match error {
396            InternetArchiveError::Http { code, message, .. } => {
397                assert_eq!(code.as_deref(), Some("bad_request"));
398                assert_eq!(message.as_deref(), Some("request failed"));
399            }
400            other => panic!("unexpected error: {other:?}"),
401        }
402
403        server.abort();
404    }
405}