1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
use futures::TryStreamExt;
use regex::Regex;
use serde::{Deserialize, Serialize};
use url::Url;
use std::ops::RangeBounds;
use std::path::Path;
#[derive(Deserialize, Serialize)]
pub struct Resource {
pub mime: String,
pub url: url::Url,
}
#[derive(Deserialize, Serialize)]
pub struct Page {
pub resources: Vec<Resource>,
pub id: u32,
}
#[derive(Deserialize)]
pub struct Item {
pub scans: Vec<Page>,
}
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("{0}")]
Io(#[from] std::io::Error),
#[error("{0}")]
Http(#[from] reqwest::Error),
#[error("{0}")]
Serde(#[from] serde_json::Error),
}
#[must_use]
pub fn resource_id_from_url(url: &Url) -> String {
Regex::new(r"item/([a-zA-Z0-9-]+,)?([a-zA-Z0-9]+)")
.expect("correct regexp")
.captures(url.path())
.unwrap()
.get(2)
.unwrap()
.as_str()
.to_string()
}
pub async fn list_pages<R>(url: &Url, range: R) -> Result<Vec<Page>, Error>
where
R: RangeBounds<usize>,
{
let mut url = url.clone();
let resource_id = resource_id_from_url(&url);
let path = format!("/api/entities/{}/", resource_id);
url.set_path(&path);
let mut item: Item = reqwest::get(url.as_str()).await?.json().await?;
for page in &mut item.scans {
page.resources.retain(|r| r.mime == "image/jpeg");
}
Ok(item.scans.drain(range).collect())
}
pub async fn download_page_image(url: &Url, output_path: &Path) -> Result<(), Error> {
use async_compat::CompatExt;
use tokio::io::AsyncWriteExt;
let mut file = tokio::fs::OpenOptions::new()
.append(true)
.create_new(true)
.open(output_path)
.await?;
let mut response = reqwest::get(url.as_str())
.await?
.bytes_stream()
.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))
.into_async_read();
futures::io::copy(&mut response, &mut file.compat_mut()).await?;
file.flush().await?;
Ok(())
}