apify_client/
datasets.rs

1use std::marker::PhantomData;
2use serde::{de::DeserializeOwned, Deserialize, Serialize};
3
4use crate::client::{ApifyClient, ApifyApiError, ApifyClientError, ApifyClientResult, IdOrName};
5use crate::utils::{stringify_resource, json_content_headers, parse_pagination_header, is_resource_by_name};
6use crate::generic_types::{SimpleBuilder, PaginationList, NoContent};
7
8pub const BASE_URL: &str = "https://api.apify.com/v2/datasets";
9
10impl ApifyClient {
11    /// List datasets of the provided account.
12    /// Requires API token.
13    pub fn list_datasets(&self) -> ListDatasetsBuilder<'_> {
14        ListDatasetsBuilder {
15            client: self,
16            options: ListDatasetsParams::default(),
17        }
18    }
19
20    /// Requires API token
21    pub fn create_dataset(&self, dataset_name: &str) -> SimpleBuilder<'_, Dataset> {
22        let url = format!("{}?name={}", BASE_URL, dataset_name);
23        SimpleBuilder {
24            client: self,
25            requires_token: true,
26            url,
27            method: reqwest::Method::POST,
28            body: Ok(None),
29            headers: None,
30            phantom: PhantomData,
31        }
32    }
33
34    /// Gets a dataset info object
35    /// If you provide dataset ID, you don't need a token
36    /// If you provide username~datasetName, you need a token (otherwise it will return an Error)
37    pub fn get_dataset(&self, dataset_id_or_name: &IdOrName) -> SimpleBuilder<'_, Dataset> {
38        let url = format!("{}/{}?", BASE_URL, stringify_resource(dataset_id_or_name));
39    
40        SimpleBuilder {
41            client: self,
42            url,
43            requires_token: is_resource_by_name(dataset_id_or_name),
44            method: reqwest::Method::GET,
45            body: Ok(None),
46            headers: None,
47            phantom: PhantomData,
48        }
49    }
50
51    /// Requires API token
52    pub fn update_dataset(&self, dataset_id_or_name: &IdOrName, new_dataset_name: &str) -> SimpleBuilder<'_, Dataset> {
53        let url = format!("{}/{}?", BASE_URL, stringify_resource(dataset_id_or_name));
54        let json_body = json!({
55            "name": new_dataset_name
56        });
57        let bytes = serde_json::to_vec(&json_body).expect("Parsing just defined JSON should never fail!"); 
58        SimpleBuilder {
59            client: self,
60            url,
61            requires_token: true,
62            method: reqwest::Method::PUT,
63            body: Ok(Some(bytes)),
64            headers: Some(json_content_headers()),
65            phantom: PhantomData,
66        }
67    }
68
69    /// Requires API token
70    pub fn delete_dataset(&self, dataset_id_or_name: &IdOrName) -> SimpleBuilder<'_, NoContent> {
71        let url = format!("{}/{}?", BASE_URL, stringify_resource(dataset_id_or_name));
72        SimpleBuilder {
73            client: self,
74            url,
75            requires_token: true,
76            method: reqwest::Method::DELETE,
77            body: Ok(None),
78            headers: None,
79            phantom: PhantomData,
80        }
81    }
82
83    /// Appends item(s) at the end of the dataset.
84    /// `items` must serialize into JSON object or array of objects and the JSON must have size less than 5 MB.
85    /// Otherwise the Apify API returns an error.
86    /// Requires API token.
87    /// [API reference](https://docs.apify.com/api/v2#/reference/datasets/item-collection/put-items)
88    pub fn put_items<T: Serialize>(&self, dataset_id_or_name: &IdOrName, items: &T) -> SimpleBuilder<'_, NoContent> {
89        let url = format!("{}/{}/items?", BASE_URL, stringify_resource(dataset_id_or_name));
90        let wrapped_bytes = Some(serde_json::to_vec(items)).transpose();
91        
92        SimpleBuilder {
93            client: self,
94            url,
95            requires_token: true,
96            method: reqwest::Method::POST,
97            body: wrapped_bytes,
98            headers: Some(json_content_headers()),
99            phantom: PhantomData,
100        }
101    }
102
103    /// Gets items from the dataset in JSON format and parses them into `PaginationList<T>`.
104    /// If you need non-parsed String and/or different formats choose `get_items_raw` instead.
105    /// [API reference](https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items).
106    pub fn get_items<T: serde::de::DeserializeOwned>(&self, dataset_id_or_name: IdOrName) -> GetItemsBuilder<'_, T> {
107        GetItemsBuilder {
108            client: self,
109            dataset_id_or_name,
110            options: GetItemsParams::default(),
111            _phantom: PhantomData,
112        }
113    }
114
115    /// Gets items from the dataset in any format and return them as `String` (no PaginationList). 
116    /// [API reference](https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items).
117    pub fn get_items_raw(&self, dataset_id_or_name: IdOrName) -> GetItemsBuilderRaw<'_> {
118        GetItemsBuilderRaw {
119            client: self,
120            dataset_id_or_name,
121            options: GetItemsParams::default(),
122        }
123    }
124}
125
126fn get_items_prepare_url(client: &ApifyClient, dataset_id_or_name: &IdOrName, params: &GetItemsParams) -> Result<String, ApifyClientError> {
127    let url = format!("{}/{}/items?{}", BASE_URL, stringify_resource(&dataset_id_or_name), params.to_query_params());
128    let url = if is_resource_by_name(dataset_id_or_name) {
129        let token = client.optional_token.as_ref().ok_or(ApifyApiError::MissingToken)?;
130        format!("{}&token={}", &url, token)
131    } else {
132        url
133    };
134    Ok(url)
135}
136
137#[derive(Deserialize, Debug)]
138#[serde(rename_all = "camelCase")]
139pub struct Dataset {
140    pub id: String,
141    pub name: Option<String>,
142    pub user_id: String,
143    pub created_at: String,
144    pub modified_at: String,
145    pub accessed_at: String,
146    pub item_count: u32,
147    pub clean_item_count: Option<u32>,
148    pub act_id: Option<String>,
149    pub act_run_id: Option<String>
150}
151
152#[derive(Debug)]
153pub enum Format {
154    Json,
155    Jsonl,
156    Xml,
157    Html,
158    Csv,
159    Xlsx,
160    Rss,
161}
162
163impl Default for Format {
164    fn default() -> Self {
165        Format::Json
166    }
167}
168
169impl std::fmt::Display for Format {
170    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
171        let string_repr = match self {
172            Format::Json => "json",
173            Format::Jsonl => "jsonl",
174            Format::Xml => "xml",
175            Format::Html => "html",
176            Format::Csv => "csv",
177            Format::Xlsx => "xlsx",
178            Format::Rss => "ss",
179        };
180        write!(f, "{}", string_repr)
181    }
182}
183
184#[derive(Default, QueryParams)]
185#[allow(non_snake_case)]
186struct GetItemsParams {
187    format: Format,
188    clean: Option<bool>,
189    offset: Option<u64>,
190    limit: Option<u64>,
191    // Just string so QueryParams work, we parse it ourselves
192    fields: Option<String>,
193    // Just string so QueryParams work, we parse it ourselves
194    omit: Option<String>,
195    unwind: Option<String>,
196    desc: Option<bool>, 
197    attachment: Option<bool>,
198    delimiter: Option<String>,
199    bom: Option<bool>,
200    xmlRoot: Option<String>,
201    xmlRow: Option<String>,
202    skipHeaderRow: Option<bool>,
203    skipHidden: Option<bool>, 
204    skipEmpty: Option<bool>, 
205    simplified: Option<bool>,
206    skipFailedPages: Option<bool>,
207}
208
209pub struct GetItemsBuilder<'a, T> {
210    client: &'a ApifyClient,
211    dataset_id_or_name: IdOrName,
212    options: GetItemsParams,
213    _phantom: PhantomData<T>,
214}
215
216pub struct GetItemsBuilderRaw<'a> {
217    client: &'a ApifyClient,
218    dataset_id_or_name: IdOrName,
219    options: GetItemsParams,
220}
221
222impl <'a, T: DeserializeOwned> GetItemsBuilder<'a, T> {
223    pub fn clean(& mut self, clean: bool) -> &'_ mut Self {
224        self.options.clean = Some(clean);
225        self
226    }
227    pub fn offset(& mut self, offset: u64) -> &'_ mut Self {
228        self.options.offset = Some(offset);
229        self
230    }
231    pub fn limit(& mut self, limit: u64) -> &'_ mut Self {
232        self.options.limit = Some(limit);
233        self
234    }
235    pub fn fields(& mut self, fields: Vec<String>) -> &'_ mut Self {
236        self.options.fields = Some(fields.join(","));
237        self
238    }
239    pub fn omit(& mut self, omit: Vec<String>) -> &'_ mut Self {
240        self.options.omit = Some(omit.join(","));
241        self
242    }
243    pub fn unwind(& mut self, unwind: String) -> &'_ mut Self {
244        self.options.unwind = Some(unwind);
245        self
246    }
247    pub fn desc(& mut self, desc: bool) -> &'_ mut Self {
248        self.options.desc = Some(desc);
249        self
250    }
251    pub fn attachment(& mut self, attachment: bool) -> &'_ mut Self {
252        self.options.attachment = Some(attachment);
253        self
254    }
255    pub fn delimiter(& mut self, delimiter: String) -> &'_ mut Self {
256        self.options.delimiter = Some(delimiter);
257        self
258    }
259    pub fn bom(& mut self, bom: bool) -> &'_ mut Self {
260        self.options.bom = Some(bom);
261        self
262    }
263    pub fn xml_root(& mut self, xml_root: String) -> &'_ mut Self {
264        self.options.xmlRoot = Some(xml_root);
265        self
266    }
267    pub fn xml_row(& mut self, xml_row: String) -> &'_ mut Self {
268        self.options.xmlRow = Some(xml_row);
269        self
270    }
271    pub fn skip_header_row(& mut self, skip_header_row: bool) -> &'_ mut Self {
272        self.options.skipHeaderRow = Some(skip_header_row);
273        self
274    }
275    pub fn skip_hidden(& mut self, skip_hidden: bool) -> &'_ mut Self {
276        self.options.skipHidden = Some(skip_hidden);
277        self
278    }
279    pub fn skip_empty(& mut self, skip_empty: bool) -> &'_ mut Self {
280        self.options.skipEmpty = Some(skip_empty);
281        self
282    }
283    pub fn simplified(& mut self, simplified: bool) -> &'_ mut Self {
284        self.options.simplified = Some(simplified);
285        self
286    }
287    pub fn skip_failed_pages(& mut self, skip_failed_pages: bool) -> &'_ mut Self {
288        self.options.skipFailedPages = Some(skip_failed_pages);
289        self
290    }
291
292    pub async fn send(&self) -> Result<PaginationList<T>, ApifyClientError> {
293        let url = get_items_prepare_url(self.client, &self.dataset_id_or_name, &self.options)?;
294        let resp = self.client.retrying_request(&url, &reqwest::Method::GET, &None, &None).await?;
295        // For this endpoint, we have to reconstruct PaginationList manually
296        let headers = resp.headers().clone();
297        let bytes = resp.bytes().await.map_err(
298            |err| ApifyApiError::ApiFailure(format!("Apify API did not return bytes. Something is very wrong. Please contact support@apify.com\n{}", err))
299        )?;
300        let items: Vec<T> = serde_json::from_slice(&bytes)?;
301        println!("{:?}", headers);
302        
303        let total: u64 = parse_pagination_header(&headers, "X-Apify-Pagination-Total")?;
304        let limit: u64 = parse_pagination_header(&headers, "X-Apify-Pagination-Limit")?;
305        let offset: u64 = parse_pagination_header(&headers, "X-Apify-Pagination-Offset")?;
306        // Because x-apify-pagination-count returns invalid values when hidden/empty items are skipped
307        let count: u64 = items.len() as u64;
308
309        let pagination_list = PaginationList {
310            total,
311            limit: Some(limit),
312            count,
313            offset,
314            desc: false,
315            items,
316        };
317        return Ok(pagination_list);  
318    }
319}
320
321// TODO: Dedup this code
322impl <'a> GetItemsBuilderRaw<'a> {
323    pub fn format(& mut self, format: Format) -> &'_ mut Self {
324        self.options.format = format;
325        self
326    }
327    pub fn clean(& mut self, clean: bool) -> &'_ mut Self {
328        self.options.clean = Some(clean);
329        self
330    }
331    pub fn offset(& mut self, offset: u64) -> &'_ mut Self {
332        self.options.offset = Some(offset);
333        self
334    }
335    pub fn limit(& mut self, limit: u64) -> &'_ mut Self {
336        self.options.limit = Some(limit);
337        self
338    }
339    pub fn fields(& mut self, fields: Vec<String>) -> &'_ mut Self {
340        self.options.fields = Some(fields.join(","));
341        self
342    }
343    pub fn omit(& mut self, omit: Vec<String>) -> &'_ mut Self {
344        self.options.omit = Some(omit.join(","));
345        self
346    }
347    pub fn unwind(& mut self, unwind: String) -> &'_ mut Self {
348        self.options.unwind = Some(unwind);
349        self
350    }
351    pub fn desc(& mut self, desc: bool) -> &'_ mut Self {
352        self.options.desc = Some(desc);
353        self
354    }
355    pub fn attachment(& mut self, attachment: bool) -> &'_ mut Self {
356        self.options.attachment = Some(attachment);
357        self
358    }
359    pub fn delimiter(& mut self, delimiter: String) -> &'_ mut Self {
360        self.options.delimiter = Some(delimiter);
361        self
362    }
363    pub fn bom(& mut self, bom: bool) -> &'_ mut Self {
364        self.options.bom = Some(bom);
365        self
366    }
367    pub fn xml_root(& mut self, xml_root: String) -> &'_ mut Self {
368        self.options.xmlRoot = Some(xml_root);
369        self
370    }
371    pub fn xml_row(& mut self, xml_row: String) -> &'_ mut Self {
372        self.options.xmlRow = Some(xml_row);
373        self
374    }
375    pub fn skip_header_row(& mut self, skip_header_row: bool) -> &'_ mut Self {
376        self.options.skipHeaderRow = Some(skip_header_row);
377        self
378    }
379    pub fn skip_hidden(& mut self, skip_hidden: bool) -> &'_ mut Self {
380        self.options.skipHidden = Some(skip_hidden);
381        self
382    }
383    pub fn skip_empty(& mut self, skip_empty: bool) -> &'_ mut Self {
384        self.options.skipEmpty = Some(skip_empty);
385        self
386    }
387    pub fn simplified(& mut self, simplified: bool) -> &'_ mut Self {
388        self.options.simplified = Some(simplified);
389        self
390    }
391    pub fn skip_failed_pages(& mut self, skip_failed_pages: bool) -> &'_ mut Self {
392        self.options.skipFailedPages = Some(skip_failed_pages);
393        self
394    }
395
396    pub async fn send(&self) -> Result<String, ApifyClientError> {
397        let url = get_items_prepare_url(self.client, &self.dataset_id_or_name, &self.options)?;
398        let resp = self.client.retrying_request(&url, &reqwest::Method::GET, &None, &None).await?;
399        
400        let output = resp.text().await.map_err(
401            |err| ApifyApiError::ApiFailure(format!("Apify API did not return valid UTF-8. Something is very wrong. Please contact support@apify.com\n{}", err))
402        )?;
403        return Ok(output);
404    }
405}
406
407#[derive(QueryParams, Default)]
408struct ListDatasetsParams {
409    offset: Option<u32>,
410    limit: Option<u32>,
411    desc: Option<bool>,
412    unnamed: Option<bool>,
413}
414
415pub struct ListDatasetsBuilder<'a> {
416    client: &'a ApifyClient,
417    options: ListDatasetsParams
418}
419
420impl <'a> ListDatasetsBuilder<'a> {
421    pub fn offset(& mut self, offset: u32) -> &'_ mut Self {
422        self.options.offset = Some(offset);
423        self
424    }
425    pub fn limit(& mut self, limit: u32) -> &'_ mut Self {
426        self.options.limit = Some(limit);
427        self
428    }
429    pub fn desc(& mut self, desc: bool) -> &'_ mut Self {
430        self.options.desc = Some(desc);
431        self
432    }
433    pub fn unnamed(& mut self, unnamed: bool) -> &'_ mut Self {
434        self.options.unnamed = Some(unnamed);
435        self
436    }
437
438    pub async fn send(&self) -> Result<PaginationList<Dataset>, ApifyClientError> {
439        let query_string = self.options.to_query_params();
440        let url = format!(
441            "{}?{}&token={}",
442            BASE_URL,
443            query_string,
444            self.client.optional_token.as_ref().ok_or(ApifyApiError::MissingToken)?
445        );
446        let resp = self.client.retrying_request(&url, &reqwest::Method::GET, &None, &None).await?;
447        let bytes = resp.bytes().await.map_err(
448            |err| ApifyApiError::ApiFailure(format!("Apify API did not return bytes. Something is very wrong. Please contact support@apify.com\n{}", err))
449        )?;
450
451        let apify_client_result: ApifyClientResult<PaginationList<Dataset>> = serde_json::from_slice(&bytes)?;
452        return Ok(apify_client_result.data);
453    }
454}