Skip to main content

internetarchive_rs/
model.rs

1//! Typed API response models.
2
3use std::collections::BTreeMap;
4
5use serde::{Deserialize, Serialize};
6use serde_json::Value;
7use url::Url;
8
9use crate::metadata::ItemMetadata;
10use crate::serde_util::deserialize_option_u64ish;
11use crate::{ItemIdentifier, TaskId};
12
13/// Full response returned by `GET /metadata/{identifier}`.
14#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
15pub struct Item {
16    /// Item creation timestamp, when present.
17    #[serde(default, deserialize_with = "deserialize_option_u64ish")]
18    pub created: Option<u64>,
19    /// Primary data node hostname.
20    #[serde(default)]
21    pub d1: Option<String>,
22    /// Secondary data node hostname.
23    #[serde(default)]
24    pub d2: Option<String>,
25    /// Item directory path inside IA storage.
26    #[serde(default)]
27    pub dir: Option<String>,
28    /// Files contained in the item.
29    #[serde(default)]
30    pub files: Vec<ItemFile>,
31    /// Reported file count.
32    #[serde(default, deserialize_with = "deserialize_option_u64ish")]
33    pub files_count: Option<u64>,
34    /// Last updated timestamp.
35    #[serde(default, deserialize_with = "deserialize_option_u64ish")]
36    pub item_last_updated: Option<u64>,
37    /// Reported item size in bytes.
38    #[serde(default, deserialize_with = "deserialize_option_u64ish")]
39    pub item_size: Option<u64>,
40    /// Flexible metadata map.
41    #[serde(default)]
42    pub metadata: ItemMetadata,
43    /// Host currently serving the metadata read.
44    #[serde(default)]
45    pub server: Option<String>,
46    /// Unique record hash or sequence number.
47    #[serde(default, deserialize_with = "deserialize_option_u64ish")]
48    pub uniq: Option<u64>,
49    /// Alternate workable hosts.
50    #[serde(default)]
51    pub workable_servers: Vec<String>,
52    /// Any unmodeled top-level fields.
53    #[serde(default, flatten)]
54    pub extra: BTreeMap<String, Value>,
55}
56
57impl Item {
58    /// Returns the validated item identifier from metadata.
59    #[must_use]
60    pub fn identifier(&self) -> Option<ItemIdentifier> {
61        self.metadata
62            .get_text("identifier")
63            .and_then(|value| ItemIdentifier::new(value).ok())
64    }
65
66    /// Finds a file by exact name.
67    #[must_use]
68    pub fn file(&self, name: &str) -> Option<&ItemFile> {
69        self.files.iter().find(|file| file.name == name)
70    }
71}
72
73/// File entry returned by metadata reads.
74#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
75pub struct ItemFile {
76    /// File name relative to the item root.
77    pub name: String,
78    /// Origin of the file, such as `original` or `derivative`.
79    #[serde(default)]
80    pub source: Option<String>,
81    /// IA format label.
82    #[serde(default)]
83    pub format: Option<String>,
84    /// Last modified timestamp.
85    #[serde(default, deserialize_with = "deserialize_option_u64ish")]
86    pub mtime: Option<u64>,
87    /// Size in bytes.
88    #[serde(default, deserialize_with = "deserialize_option_u64ish")]
89    pub size: Option<u64>,
90    /// MD5 hash when available.
91    #[serde(default)]
92    pub md5: Option<String>,
93    /// CRC32 hash when available.
94    #[serde(default)]
95    pub crc32: Option<String>,
96    /// SHA1 hash when available.
97    #[serde(default)]
98    pub sha1: Option<String>,
99    /// Original file name for derivative files.
100    #[serde(default)]
101    pub original: Option<String>,
102    /// Any additional file metadata.
103    #[serde(default, flatten)]
104    pub extra: BTreeMap<String, Value>,
105}
106
107/// Response returned by MDAPI metadata writes.
108#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
109pub struct MetadataWriteResponse {
110    /// Whether the request was accepted.
111    pub success: bool,
112    /// Queued task identifier.
113    #[serde(default)]
114    pub task_id: Option<TaskId>,
115    /// Log URL for the queued task.
116    #[serde(default)]
117    pub log: Option<Url>,
118    /// Error message when `success` is false.
119    #[serde(default)]
120    pub error: Option<String>,
121}
122
123/// Echo block returned by `advancedsearch.php`.
124#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
125pub struct SearchResponseHeader {
126    /// Status code reported by the search service.
127    #[serde(default)]
128    pub status: i64,
129    /// Query time in milliseconds, when present.
130    #[serde(default)]
131    #[serde(rename = "QTime")]
132    pub q_time: Option<i64>,
133    /// Echoed request parameters.
134    #[serde(default)]
135    pub params: BTreeMap<String, Value>,
136}
137
138/// Document list returned by search.
139#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
140pub struct SearchResultPage {
141    /// Number of matching documents.
142    #[serde(rename = "numFound")]
143    pub num_found: u64,
144    /// Start offset of this page.
145    pub start: u64,
146    /// Returned documents.
147    #[serde(default)]
148    pub docs: Vec<SearchDocument>,
149}
150
151/// Search response wrapper.
152#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
153pub struct SearchResponse {
154    /// Header and echoed parameters.
155    #[serde(default)]
156    #[serde(rename = "responseHeader")]
157    pub response_header: SearchResponseHeader,
158    /// Main result page.
159    pub response: SearchResultPage,
160}
161
162/// Flexible document returned by advanced search.
163#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)]
164#[serde(transparent)]
165pub struct SearchDocument(BTreeMap<String, Value>);
166
167impl SearchDocument {
168    /// Returns the raw field value.
169    #[must_use]
170    pub fn get(&self, key: &str) -> Option<&Value> {
171        self.0.get(key)
172    }
173
174    /// Returns a string field.
175    #[must_use]
176    pub fn get_text(&self, key: &str) -> Option<&str> {
177        self.get(key).and_then(Value::as_str)
178    }
179
180    /// Returns the validated item identifier from a search document.
181    #[must_use]
182    pub fn identifier(&self) -> Option<ItemIdentifier> {
183        self.get_text("identifier")
184            .and_then(|value| ItemIdentifier::new(value).ok())
185    }
186
187    /// Returns the title field when present.
188    #[must_use]
189    pub fn title(&self) -> Option<&str> {
190        self.get_text("title")
191    }
192
193    /// Returns the raw field map.
194    #[must_use]
195    pub fn as_map(&self) -> &BTreeMap<String, Value> {
196        &self.0
197    }
198}
199
200/// Result of submitting a task to the Internet Archive tasks API.
201#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
202pub struct TaskSubmission {
203    /// Queued task identifier returned by the tasks API.
204    pub task_id: TaskId,
205    /// URL of the task log file.
206    pub log: Url,
207}
208
209/// Response returned by the S3 limit-check endpoint.
210#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
211pub struct S3LimitCheck {
212    /// Bucket name echoed by the service.
213    pub bucket: String,
214    /// Access key echoed by the service.
215    pub accesskey: String,
216    /// Whether the queue is over limit.
217    pub over_limit: i64,
218    /// Backend-specific detail string.
219    #[serde(default)]
220    pub detail: Option<Value>,
221}
222
223#[cfg(test)]
224mod tests {
225    use super::{Item, SearchResponse};
226
227    #[test]
228    fn item_deserializes_realistic_metadata_payloads() {
229        let item: Item = serde_json::from_value(serde_json::json!({
230            "created": 1_776_513_537,
231            "files": [
232                {
233                    "name": "xfetch.pdf",
234                    "size": 419_170,
235                    "md5": "abc"
236                }
237            ],
238            "metadata": {
239                "identifier": "xfetch",
240                "title": "XFETCH"
241            }
242        }))
243        .unwrap();
244
245        assert_eq!(item.file("xfetch.pdf").unwrap().size, Some(419_170));
246        assert_eq!(item.identifier().unwrap().as_str(), "xfetch");
247    }
248
249    #[test]
250    fn search_response_deserializes_advancedsearch_shape() {
251        let response: SearchResponse = serde_json::from_value(serde_json::json!({
252            "responseHeader": {
253                "status": 0,
254                "QTime": 12,
255                "params": { "query": "identifier:xfetch" }
256            },
257            "response": {
258                "numFound": 1,
259                "start": 0,
260                "docs": [
261                    {
262                        "identifier": "xfetch",
263                        "title": "XFETCH"
264                    }
265                ]
266            }
267        }))
268        .unwrap();
269
270        assert_eq!(
271            response.response.docs[0].identifier().unwrap().as_str(),
272            "xfetch"
273        );
274        assert_eq!(response.response.docs[0].title(), Some("XFETCH"));
275        assert_eq!(
276            response.response.docs[0].as_map()["title"],
277            serde_json::Value::String("XFETCH".to_owned())
278        );
279    }
280
281    #[test]
282    fn search_response_deserializes_without_response_header() {
283        let response: SearchResponse = serde_json::from_value(serde_json::json!({
284            "response": {
285                "numFound": 1,
286                "start": 0,
287                "docs": [
288                    {
289                        "identifier": "xfetch",
290                        "title": "XFETCH"
291                    }
292                ]
293            }
294        }))
295        .unwrap();
296
297        assert_eq!(response.response_header.status, 0);
298        assert!(response.response_header.params.is_empty());
299        assert_eq!(
300            response.response.docs[0].identifier().unwrap().as_str(),
301            "xfetch"
302        );
303    }
304}