1use reqwest::{Response, StatusCode};
4use serde::Deserialize;
5use serde_json::Value;
6use thiserror::Error;
7
8use crate::ids::IdentifierError;
9
10#[derive(Debug, Error)]
12pub enum InternetArchiveError {
13 #[error("Internet Archive returned HTTP {status}: {message:?}")]
15 Http {
16 status: StatusCode,
18 code: Option<String>,
20 message: Option<String>,
22 raw_body: Option<String>,
24 },
25 #[error("metadata write failed: {message}")]
27 MetadataWriteFailed {
28 message: String,
30 raw_body: Option<String>,
32 },
33 #[error("item not found: {identifier}")]
35 ItemNotFound {
36 identifier: String,
38 },
39 #[error("this operation requires Internet Archive credentials")]
41 MissingAuth,
42 #[error("item already contains file and selected policy forbids overwrite: {filename}")]
44 UploadConflict {
45 filename: String,
47 },
48 #[error("item is missing file: {filename}")]
50 MissingFile {
51 filename: String,
53 },
54 #[error("invalid Internet Archive state: {0}")]
56 InvalidState(String),
57 #[error("timed out waiting for Internet Archive {0}")]
59 Timeout(&'static str),
60 #[error(transparent)]
62 Transport(#[from] reqwest::Error),
63 #[error(transparent)]
65 Json(#[from] serde_json::Error),
66 #[error(transparent)]
68 Xml(#[from] quick_xml::DeError),
69 #[error(transparent)]
71 Io(#[from] std::io::Error),
72 #[error(transparent)]
74 Url(#[from] url::ParseError),
75 #[error("failed to read environment variable {name}: {source}")]
77 EnvVar {
78 name: String,
80 #[source]
82 source: std::env::VarError,
83 },
84 #[error(transparent)]
86 Identifier(#[from] IdentifierError),
87}
88
89impl InternetArchiveError {
90 pub(crate) async fn from_response(response: Response) -> Self {
91 let status = response.status();
92 let content_type = response
93 .headers()
94 .get(reqwest::header::CONTENT_TYPE)
95 .and_then(|value| value.to_str().ok())
96 .map(str::to_owned);
97
98 let body = match response.bytes().await {
99 Ok(bytes) => bytes,
100 Err(error) => return Self::Transport(error),
101 };
102
103 decode_http_error(status, content_type.as_deref(), &body)
104 }
105}
106
107#[derive(Debug, Deserialize)]
108struct MdapiError {
109 #[serde(default)]
110 success: Option<bool>,
111 #[serde(default)]
112 error: Option<String>,
113 #[serde(default)]
114 message: Option<String>,
115 #[serde(default)]
116 code: Option<String>,
117}
118
119#[derive(Debug, Deserialize)]
120struct XmlError {
121 #[serde(rename = "Code")]
122 code: Option<String>,
123 #[serde(rename = "Message")]
124 message: Option<String>,
125}
126
127pub(crate) fn decode_http_error(
128 status: StatusCode,
129 content_type: Option<&str>,
130 body: &[u8],
131) -> InternetArchiveError {
132 let raw_body = trimmed_body(body);
133
134 if looks_like_json(content_type, body) {
135 if let Ok(parsed) = serde_json::from_slice::<MdapiError>(body) {
136 return InternetArchiveError::Http {
137 status,
138 code: parsed.code,
139 message: parsed.error.or(parsed.message).or(raw_body.clone()),
140 raw_body,
141 };
142 }
143
144 if let Ok(parsed) = serde_json::from_slice::<Value>(body) {
145 return InternetArchiveError::Http {
146 status,
147 code: parsed
148 .get("code")
149 .and_then(Value::as_str)
150 .map(str::to_owned),
151 message: parsed
152 .get("error")
153 .and_then(Value::as_str)
154 .or_else(|| parsed.get("message").and_then(Value::as_str))
155 .or_else(|| parsed.get("title").and_then(Value::as_str))
156 .map(str::to_owned)
157 .or(raw_body.clone()),
158 raw_body,
159 };
160 }
161 }
162
163 if looks_like_xml(content_type, body) {
164 if let Ok(parsed) = quick_xml::de::from_str::<XmlError>(&String::from_utf8_lossy(body)) {
165 return InternetArchiveError::Http {
166 status,
167 code: parsed.code,
168 message: parsed.message.or(raw_body.clone()),
169 raw_body,
170 };
171 }
172 }
173
174 InternetArchiveError::Http {
175 status,
176 code: None,
177 message: raw_body.clone(),
178 raw_body,
179 }
180}
181
182pub(crate) fn decode_metadata_write_failure(body: &[u8]) -> Result<(), InternetArchiveError> {
183 let parsed: MdapiError = serde_json::from_slice(body)?;
184 match parsed.success {
185 Some(true) => Ok(()),
186 _ => Err(InternetArchiveError::MetadataWriteFailed {
187 message: parsed
188 .error
189 .or(parsed.message)
190 .unwrap_or_else(|| "unknown metadata write error".to_owned()),
191 raw_body: trimmed_body(body),
192 }),
193 }
194}
195
196fn looks_like_json(content_type: Option<&str>, body: &[u8]) -> bool {
197 if content_type
198 .is_some_and(|value| value.starts_with("application/json") || value.ends_with("+json"))
199 {
200 return true;
201 }
202
203 body.iter()
204 .find(|byte| !byte.is_ascii_whitespace())
205 .is_some_and(|byte| matches!(byte, b'{' | b'['))
206}
207
208fn looks_like_xml(content_type: Option<&str>, body: &[u8]) -> bool {
209 if content_type
210 .is_some_and(|value| value.starts_with("application/xml") || value.starts_with("text/xml"))
211 {
212 return true;
213 }
214
215 body.iter()
216 .find(|byte| !byte.is_ascii_whitespace())
217 .is_some_and(|byte| *byte == b'<')
218}
219
220fn trimmed_body(body: &[u8]) -> Option<String> {
221 let text = String::from_utf8_lossy(body);
222 for line in text.lines().map(str::trim) {
223 if !line.is_empty() {
224 return Some(line.chars().take(512).collect());
225 }
226 }
227
228 None
229}
230
231#[cfg(test)]
232mod tests {
233 use super::{decode_http_error, decode_metadata_write_failure, InternetArchiveError};
234 use axum::http::StatusCode as AxumStatusCode;
235 use axum::routing::get;
236 use axum::{Json, Router};
237 use reqwest::StatusCode;
238 use serde_json::json;
239 use tokio::net::TcpListener;
240
241 #[test]
242 fn decodes_json_http_errors() {
243 let error = decode_http_error(
244 StatusCode::BAD_REQUEST,
245 Some("application/json"),
246 br#"{"error":"no changes made"}"#,
247 );
248
249 match error {
250 InternetArchiveError::Http { message, .. } => {
251 assert_eq!(message.as_deref(), Some("no changes made"));
252 }
253 other => panic!("unexpected error: {other:?}"),
254 }
255 }
256
257 #[test]
258 fn decodes_xml_http_errors() {
259 let error = decode_http_error(
260 StatusCode::SERVICE_UNAVAILABLE,
261 Some("application/xml"),
262 br"<Error><Code>SlowDown</Code><Message>Too many requests</Message></Error>",
263 );
264
265 match error {
266 InternetArchiveError::Http { code, message, .. } => {
267 assert_eq!(code.as_deref(), Some("SlowDown"));
268 assert_eq!(message.as_deref(), Some("Too many requests"));
269 }
270 other => panic!("unexpected error: {other:?}"),
271 }
272 }
273
274 #[test]
275 fn decodes_plain_text_http_errors() {
276 let error = decode_http_error(StatusCode::BAD_GATEWAY, Some("text/plain"), b"gateway down");
277 match error {
278 InternetArchiveError::Http { message, .. } => {
279 assert_eq!(message.as_deref(), Some("gateway down"));
280 }
281 other => panic!("unexpected error: {other:?}"),
282 }
283 }
284
285 #[test]
286 fn metadata_write_failure_detection_treats_success_false_as_error() {
287 let error = decode_metadata_write_failure(
288 br#"{"success":false,"error":"No changes made to _meta.xml"}"#,
289 )
290 .unwrap_err();
291 match error {
292 InternetArchiveError::MetadataWriteFailed { message, .. } => {
293 assert!(message.contains("No changes made"));
294 }
295 other => panic!("unexpected error: {other:?}"),
296 }
297
298 assert!(decode_metadata_write_failure(br#"{"success":true,"task_id":1}"#).is_ok());
299 }
300
301 #[test]
302 fn decodes_json_fallback_value_errors_and_body_heuristics() {
303 let error = decode_http_error(
304 StatusCode::BAD_REQUEST,
305 None,
306 br#" {"error":{"nested":true},"title":"fallback title","code":"bad_request"}"#,
307 );
308
309 match error {
310 InternetArchiveError::Http {
311 code,
312 message,
313 raw_body,
314 ..
315 } => {
316 assert_eq!(code.as_deref(), Some("bad_request"));
317 assert_eq!(message.as_deref(), Some("fallback title"));
318 assert!(raw_body.unwrap().contains("fallback title"));
319 }
320 other => panic!("unexpected error: {other:?}"),
321 }
322 }
323
324 #[test]
325 fn decodes_xml_without_content_type_and_trims_text_bodies() {
326 let error = decode_http_error(
327 StatusCode::BAD_GATEWAY,
328 None,
329 b"\n <Error><Message>temporary outage</Message></Error>",
330 );
331
332 match error {
333 InternetArchiveError::Http { message, .. } => {
334 assert_eq!(message.as_deref(), Some("temporary outage"));
335 }
336 other => panic!("unexpected error: {other:?}"),
337 }
338
339 let long_text = format!("\n\n{}", "x".repeat(600));
340 let trimmed = decode_http_error(
341 StatusCode::BAD_GATEWAY,
342 Some("text/plain"),
343 long_text.as_bytes(),
344 );
345 match trimmed {
346 InternetArchiveError::Http { message, .. } => {
347 assert_eq!(message.unwrap().len(), 512);
348 }
349 other => panic!("unexpected error: {other:?}"),
350 }
351 }
352
353 #[test]
354 fn metadata_write_failure_without_message_uses_default_error() {
355 let error = decode_metadata_write_failure(br#"{"success":false}"#).unwrap_err();
356 match error {
357 InternetArchiveError::MetadataWriteFailed { message, raw_body } => {
358 assert_eq!(message, "unknown metadata write error");
359 assert_eq!(raw_body.as_deref(), Some(r#"{"success":false}"#));
360 }
361 other => panic!("unexpected error: {other:?}"),
362 }
363 }
364
365 #[test]
366 fn empty_plaintext_body_produces_no_message() {
367 let error = decode_http_error(StatusCode::BAD_GATEWAY, Some("text/plain"), b"\n \n\t");
368 match error {
369 InternetArchiveError::Http {
370 message, raw_body, ..
371 } => {
372 assert_eq!(message, None);
373 assert_eq!(raw_body, None);
374 }
375 other => panic!("unexpected error: {other:?}"),
376 }
377 }
378
379 #[tokio::test]
380 async fn from_response_decodes_http_failures() {
381 async fn handler() -> (AxumStatusCode, Json<serde_json::Value>) {
382 (
383 AxumStatusCode::BAD_REQUEST,
384 Json(json!({"error":"request failed","code":"bad_request"})),
385 )
386 }
387
388 let app = Router::new().route("/", get(handler));
389 let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
390 let addr = listener.local_addr().unwrap();
391 let server = tokio::spawn(async move { axum::serve(listener, app).await.unwrap() });
392
393 let response = reqwest::get(format!("http://{addr}/")).await.unwrap();
394 let error = InternetArchiveError::from_response(response).await;
395 match error {
396 InternetArchiveError::Http { code, message, .. } => {
397 assert_eq!(code.as_deref(), Some("bad_request"));
398 assert_eq!(message.as_deref(), Some("request failed"));
399 }
400 other => panic!("unexpected error: {other:?}"),
401 }
402
403 server.abort();
404 }
405}