booru_dl/api.rs
1//! A core module for interacting with the Gelbooru API.
2//!
3//! Usually, you prefer to use the [`BatchGetter`] struct to get the [`data`] from the Gelbooru API.
4
5use std::path::PathBuf;
6use std::sync::LazyLock;
7
8use reqwest::{Client, Url};
9use serde::{Deserialize, Serialize};
10
11/// The URLs for the Gelbooru API.
12pub mod url {
13 use super::*;
14
15 /// The base URL of the Gelbooru.
16 pub const BASE_URL: &str = "https://gelbooru.com/index.php";
17
18 /// The Api URL of the Gelbooru, which can be used to query gelbooru's database.
19 pub static API_URL: LazyLock<Url> = LazyLock::new(|| {
20 // see: https://gelbooru.com/index.php?page=wiki&s=view&id=18780
21 Url::parse_with_params(
22 BASE_URL,
23 &[
24 ("page", "dapi"),
25 ("s", "post"),
26 ("q", "index"),
27 ("json", "1"),
28 ],
29 )
30 .unwrap()
31 });
32
33 /// The Post URL of the Gelbooru, which can be used to display the images.
34 pub static POST_URL: LazyLock<Url> = LazyLock::new(|| {
35 // see: https://gelbooru.com/index.php?page=wiki&s=view&id=18780
36 Url::parse_with_params(BASE_URL, &[("page", "post"), ("s", "list"), ("q", "index")])
37 .unwrap()
38 });
39}
40
41/// This struct is used to auto initialize the `filename` field for the `Post` struct.
42#[derive(Deserialize)]
43pub(crate) struct PostInner {
44 pub(crate) id: u64,
45 pub(crate) md5: String,
46 pub(crate) file_url: String,
47 pub(crate) tags: String,
48 pub(crate) image: PathBuf,
49}
50
51impl From<PostInner> for data::field::Post {
52 /// `filename` equals to `id` with `image`'s extension.
53 /// e.g. `id = 12345`, `image = "test.jpg"`, then `filename = "12345.jpg"`.
54 fn from(value: PostInner) -> Self {
55 use crate::tool::SetFileStem;
56
57 // make sure only the filename is retained
58 let mut filename: PathBuf = value.image.file_name().unwrap().into();
59 filename.set_file_stem(value.id.to_string());
60
61 Self {
62 id: value.id,
63 md5: value.md5,
64 file_url: value.file_url,
65 tags: value.tags,
66 image: value.image,
67 filename,
68 }
69 }
70}
71
72/// The data structure for the JSON response from the Gelbooru API.
73pub mod data {
74 use super::*;
75
76 /// The fields of the [`Json`] response.
77 pub mod field {
78 use super::*;
79
80 /// The attributes field of the JSON response.
81 #[non_exhaustive]
82 #[derive(Debug, Deserialize, Serialize)]
83 pub struct Attributes {
84 /// The number of images in this response. Range: `0..=100`.
85 pub limit: u64,
86 /// The current index of the first image in this response.
87 pub offset: u64,
88 /// The total number of images in the gelbooru.
89 pub count: u64,
90 }
91
92 /// The post field of the JSON response.
93 #[non_exhaustive]
94 #[derive(Debug, Deserialize, Serialize)]
95 #[serde(from = "PostInner")]
96 pub struct Post {
97 /// The ID of the image.
98 pub id: u64,
99 /// The MD5 hash of the image.
100 pub md5: String,
101 /// The URL of the image, which can be used to download the image.
102 pub file_url: String,
103 /// The tags of the image. Note: these tags are marked by gelbooru.
104 pub tags: String,
105 /// The original file name of the image.
106 pub image: PathBuf,
107 /// The filename of the image, which is the same as `id` with the extension of `image`.
108 /// We will use this field to save the image.
109 pub(crate) filename: PathBuf,
110 }
111 }
112
113 /// The JSON structure response from the Gelbooru API.
114 #[non_exhaustive]
115 #[derive(Debug, Deserialize, Serialize)]
116 pub struct Json {
117 #[serde(rename = "@attributes")]
118 /// The attributes of the JSON response.
119 pub attributes: field::Attributes,
120 /// The posts of the JSON response.
121 /// if `attributes.count` is `0`, or `attributes.pid` is out of range,
122 /// this field will be `None`.
123 pub post: Option<Vec<field::Post>>,
124 }
125}
126
127/// A Consuming-Builders style function to get the data from the Gelbooru API.
128///
129/// # Example
130///
131/// ```rust
132/// use reqwest::Client;
133/// use booru_dl::api::Getter;
134///
135/// #[tokio::main]
136/// async fn main() -> reqwest::Result<()> {
137/// let client = Client::new();
138/// let tags = "cat";
139/// let limit = 10;
140/// let pid = 0;
141///
142/// let data = Getter::build(&client, &tags, limit, pid)
143/// .expect("illegal arguments")
144/// .run()
145/// .await?;
146///
147/// Ok(())
148/// }
149/// ```
150pub struct Getter<'a> {
151 client: &'a Client,
152 tags: &'a str,
153 limit: u64,
154 pid: u64,
155}
156
157impl Getter<'_> {
158 /// See <https://gelbooru.com/index.php?page=wiki&s=view&id=18780> for arguments.
159 ///
160 /// # Errors
161 ///
162 /// If `tags` is empty, or `limit` is not in the range `1..=100`, this function will return an error.
163 pub fn build<'a>(
164 client: &'a Client,
165 tags: &'a str,
166 limit: u64,
167 pid: u64,
168 ) -> anyhow::Result<Getter<'a>> {
169 if tags.is_empty() {
170 return Err(anyhow::anyhow!("Tags cannot be empty"));
171 }
172 // This is gelbooru's limit.
173 // see: https://gelbooru.com/index.php?page=wiki&s=view&id=18780
174 if !matches!(limit, 1..=100) {
175 return Err(anyhow::anyhow!("Limit can only be between 1 and 100"));
176 }
177 Ok(Getter {
178 client,
179 tags,
180 limit,
181 pid,
182 })
183 }
184
185 /// Send the request to the Gelbooru API and get the JSON response.
186 ///
187 /// # Errors
188 ///
189 /// If the request fails, this function will return an error.
190 ///
191 /// <div class="warning">
192 ///
193 /// If `limit * pid > 20_000`, the API will return an error.
194 ///
195 /// See: <https://gelbooru.com/index.php?page=forum&s=view&id=1549>
196 ///
197 /// </div>
198 pub async fn run(self) -> reqwest::Result<data::Json> {
199 let mut target_url = url::API_URL.clone();
200 target_url.query_pairs_mut().extend_pairs([
201 ("tags", self.tags),
202 ("limit", &self.limit.to_string()),
203 ("pid", &self.pid.to_string()),
204 ]);
205 self.client.get(target_url).send().await?.json().await
206 }
207}
208
209/// This helper wraps the [`Getter`] struct and automatically polls the API until the number of images is reached.
210///
211/// # Example
212///
213/// See [`Getter#example`] for example usage.
214pub struct BatchGetter<'a> {
215 client: &'a Client,
216 tags: &'a str,
217 num_imgs: u64,
218}
219
220impl BatchGetter<'_> {
221 /// See [`Getter::build`] for arguments.
222 ///
223 /// # Errors
224 ///
225 /// If `tags` is empty, or `num_imgs` is 0, this function will return an error.
226 pub fn build<'a>(
227 client: &'a Client,
228 tags: &'a str,
229 num_imgs: u64,
230 ) -> anyhow::Result<BatchGetter<'a>> {
231 if tags.is_empty() {
232 return Err(anyhow::anyhow!("Tags cannot be empty"));
233 }
234 if num_imgs == 0 {
235 return Err(anyhow::anyhow!("Number of images cannot be 0"));
236 }
237 Ok(BatchGetter {
238 client,
239 tags,
240 num_imgs,
241 })
242 }
243
244 /// Wraps the [`Getter`] struct and automatically polls the API until the number of images is reached.
245 ///
246 /// If none of the images are found, this function will return an zero capacity vector.
247 ///
248 /// # Errors
249 ///
250 /// If the request fails, this function will return an error.
251 ///
252 /// <div class="warning">
253 ///
254 /// If `num_imgs > 20_000`, the API will return an error.
255 ///
256 /// See: <https://gelbooru.com/index.php?page=forum&s=view&id=1549>
257 ///
258 /// </div>
259 pub async fn run(self) -> reqwest::Result<Vec<data::field::Post>> {
260 const LIMIT: u64 = 100;
261
262 let Self {
263 client,
264 tags,
265 num_imgs,
266 } = self;
267
268 let mut current_pid = 0;
269 let data = Getter::build(client, tags, LIMIT, current_pid)
270 .unwrap()
271 .run()
272 .await?;
273
274 let mut post_vec = match data.post {
275 Some(post) => post,
276 None => return Ok(Vec::with_capacity(0)),
277 };
278 let total_num: usize = std::cmp::min(num_imgs, data.attributes.count)
279 .try_into()
280 .expect("total number is too large to convert to `usize`");
281 // if `total_num` is 0, then `data.attributes.count` is 0,
282 // so `data.post` should be `None` and return early.
283 debug_assert_ne!(total_num, 0);
284
285 while post_vec.len() < total_num {
286 current_pid += 1;
287 let current_post_vec = Getter::build(client, tags, LIMIT, current_pid)
288 .unwrap()
289 .run()
290 .await?
291 .post
292 .expect(
293 "if `post_vec` is shorter than `total_num`, \
294 then `post` should not be `None`",
295 );
296 post_vec.extend(current_post_vec);
297 }
298 post_vec.truncate(total_num);
299
300 Ok(post_vec)
301 }
302}
303
304#[cfg(test)]
305mod tests {
306 use super::*;
307
308 #[test]
309 fn test_illegal_args() {
310 let client = Client::new();
311
312 let resp = Getter::build(&client, "", 100, 0);
313 assert!(resp.is_err());
314
315 let resp = Getter::build(&client, "cat", 0, 0);
316 assert!(resp.is_err());
317 }
318
319 #[tokio::test]
320 async fn test_get_api_data() -> reqwest::Result<()> {
321 let client = Client::new();
322 let tag = "cat";
323 let limit = 10;
324
325 let resp = Getter::build(&client, tag, limit, 0).unwrap().run().await?;
326 assert_eq!(resp.attributes.limit, limit);
327 assert!(resp
328 .post
329 .expect("if `attributes.limit` is correct, then `post` shouldn't be `None`")[0]
330 .tags
331 .contains(tag));
332 Ok(())
333 }
334
335 #[tokio::test]
336 async fn test_batch_get_api_data() -> reqwest::Result<()> {
337 let client = Client::new();
338 let tag = "cat";
339 let num_imgs = 101;
340
341 let resp = BatchGetter::build(&client, tag, num_imgs)
342 .unwrap()
343 .run()
344 .await?;
345 assert_eq!(resp.len(), usize::try_from(num_imgs).unwrap());
346
347 let tag = "balabala just no exist";
348 let resp = BatchGetter::build(&client, tag, num_imgs)
349 .unwrap()
350 .run()
351 .await?;
352 assert!(resp.is_empty());
353 Ok(())
354 }
355}