Skip to main content

vstorage/
webcal.rs

1// Copyright 2023-2024 Hugo Osvaldo Barrera
2//
3// SPDX-License-Identifier: EUPL-1.2
4
5//! Implements reading entries from a remote webcal resource.
6//!
7//! Webcal is a de-facto standard, and is basically a single icalendar file hosted via http(s).
8//!
9//! See the [Webcal wikipedia page](https://en.wikipedia.org/wiki/Webcal).
10
11use async_trait::async_trait;
12use http::{Method, Request, StatusCode, Uri};
13use http_body_util::BodyExt;
14use libdav::HttpClient;
15
16use crate::{
17    CollectionId, Error, ErrorKind, Etag, Href, ItemKind, Result,
18    base::{
19        Collection, CreateItemOptions, FetchedItem, FetchedProperty, Item, ItemVersion, Storage,
20    },
21    disco::{DiscoveredCollection, Discovery},
22    property::Property,
23    simple_component::Component,
24};
25
26/// Builder for [`WebCalStorage`].
27pub struct WebCalStorageBuilder<C: HttpClient + Clone> {
28    http_client: C,
29    url: Uri,
30    collection_id: CollectionId,
31}
32
33impl<C: HttpClient + Clone> WebCalStorageBuilder<C> {
34    /// Build the storage instance.
35    ///
36    /// Synchronous as no I/O operations are required.
37    #[must_use]
38    pub fn build(self) -> WebCalStorage<C> {
39        WebCalStorage {
40            url: self.url,
41            collection_id: self.collection_id,
42            http_client: self.http_client,
43        }
44    }
45}
46
47/// A storage which exposes items in remote icalendar resource.
48///
49/// A webcal storage contains exactly one collection, which contains all the entires found in the
50/// remote resource. The name of this single collection is specified via the `collection_id`
51/// argument.
52///
53/// This storage is a bit of an odd one (since in reality, there's no concept of collections in
54/// webcal). The extra abstraction layer is here merely to match the format of other storages.
55///
56/// # Href
57///
58/// The `href` for this meaningless. A string matching the `collection_id` property is used to
59/// describe the only available collection.
60// TODO: If an alternative href is provided, it should be used as a path on the same host.
61//       Note that discovery will only support the one matching the input URL.
62pub struct WebCalStorage<C: HttpClient + Clone> {
63    /// The URL of the remote icalendar resource. Must be HTTP or HTTPS.
64    url: Uri,
65    /// The href and id to be given to the single collection available.
66    collection_id: CollectionId,
67    http_client: C,
68}
69
70impl<C: HttpClient + Clone> WebCalStorage<C> {
71    /// Create a builder for this storage type.
72    #[must_use]
73    pub fn builder(
74        http_client: C,
75        url: Uri,
76        collection_id: CollectionId,
77    ) -> WebCalStorageBuilder<C> {
78        WebCalStorageBuilder {
79            http_client,
80            url,
81            collection_id,
82        }
83    }
84
85    /// Helper method to fetch a URL and return its body as a String.
86    ///
87    /// Be warned! This swallows headers (including `Etag`!).
88    async fn fetch_raw(&self, url: &Uri) -> Result<String> {
89        let req = Request::builder()
90            .method(Method::GET)
91            .uri(url)
92            .body(String::new())
93            .map_err(|e| ErrorKind::InvalidInput.error(e))?;
94        let response = self
95            .http_client
96            .clone()
97            .call(req)
98            .await
99            .map_err(|e| ErrorKind::Io.error(e))?;
100
101        match response.status() {
102            StatusCode::NOT_FOUND | StatusCode::GONE => {
103                return Err(ErrorKind::DoesNotExist.error("The remote resource does not exist."));
104            }
105            StatusCode::OK => {}
106            code => {
107                return Err(ErrorKind::Io.error(format!("request returned {code}")));
108            }
109        }
110
111        let (_head, body) = response.into_parts();
112        let data = body
113            .collect()
114            .await
115            .map_err(|e| ErrorKind::Io.error(e))?
116            .to_bytes();
117
118        // TODO: handle non-UTF-8 data (e.g.: Content-Type/charset).
119        // TODO: can I avoid making a copy of the entire response here?
120        String::from_utf8(data.to_vec()).map_err(|e| ErrorKind::InvalidData.error(e))
121    }
122}
123
124#[async_trait]
125impl<C: HttpClient + Clone> Storage for WebCalStorage<C> {
126    fn item_kind(&self) -> ItemKind {
127        ItemKind::Calendar
128    }
129
130    /// Checks that the remove resource exists and whether it looks like an icalendar resource.
131    async fn check(&self) -> Result<()> {
132        // TODO: Should map status codes to io::Error. if 404 -> NotFound, etc.
133        let raw = self.fetch_raw(&self.url).await?;
134
135        if !raw.starts_with("BEGIN:VCALENDAR") {
136            return Err(
137                ErrorKind::InvalidData.error("response for URL doesn't look like a calendar")
138            );
139        }
140        Ok(())
141    }
142
143    /// Returns a single collection with the name originally specified.
144    async fn discover_collections(&self) -> Result<Discovery> {
145        // TODO: shouldn't I check that the collection actually exists?
146        vec![DiscoveredCollection::new(
147            self.url.path().to_string(),
148            self.collection_id.clone(),
149        )]
150        .try_into()
151        .map_err(|e| ErrorKind::InvalidData.error(e))
152    }
153
154    /// Unsupported for this storage type.
155    async fn create_collection(&self, _: &str) -> Result<Collection> {
156        Err(ErrorKind::Unsupported.error("creating collections via webcal is not supported"))
157    }
158
159    /// Unsupported for this storage type.
160    async fn delete_collection(&self, _: &str) -> Result<()> {
161        Err(ErrorKind::Unsupported.error("destroying collections via webcal is not supported"))
162    }
163
164    /// Enumerates items in this collection.
165    ///
166    /// Note that, due to the nature of webcal, the whole collection needs to be retrieved. If some
167    /// items need to be read as well, it is generally best to use
168    /// [`WebCalStorage::get_all_items`] instead.
169    async fn list_items(&self, _collection: &str) -> Result<Vec<ItemVersion>> {
170        let raw = self.fetch_raw(&self.url).await?;
171
172        // TODO: it would be best if the parser could operate on a stream, although that might
173        //       complicate copying VTIMEZONEs inline if they are at the end of the stream.
174        let refs = Component::parse_split(&raw)
175            .map_err(|e| ErrorKind::InvalidData.error(e))?
176            .iter()
177            .map(|c| {
178                let item = Item::from(c.to_string());
179                let hash = item.hash();
180
181                ItemVersion::new(item.ident(), Etag::from(hash.to_string()))
182            })
183            .collect();
184
185        Ok(refs)
186    }
187
188    /// Returns a single item from the collection.
189    ///
190    /// Note that, due to the nature of webcal, the whole collection needs to be retrieved. It is
191    /// strongly recommended to use [`WebCalStorage::get_all_items`] instead.
192    async fn get_item(&self, href: &str) -> Result<(Item, Etag)> {
193        let raw = self.fetch_raw(&self.url).await?;
194
195        // TODO: it would be best if the parser could operate on a stream, although that might
196        //       complicate inlining VTIMEZONEs that are at the end.
197        let item = Component::parse_split(&raw)
198            .map_err(|e| ErrorKind::InvalidData.error(e))?
199            .iter()
200            .find_map(|c| {
201                let item = Item::from(c.to_string());
202                if item.ident() == href {
203                    Some(item)
204                } else {
205                    None
206                }
207            })
208            .ok_or_else(|| Error::from(ErrorKind::DoesNotExist))?;
209
210        let hash = item.hash();
211        Ok((item, hash.to_string().into()))
212    }
213
214    /// Returns multiple items from the collection.
215    ///
216    /// Note that, due to the nature of webcal, the whole collection needs to be retrieved. It is
217    /// generally best to use [`WebCalStorage::get_all_items`] instead.
218    async fn get_many_items(&self, hrefs: Vec<Href>) -> Result<Vec<FetchedItem>> {
219        let raw = self.fetch_raw(&self.url).await?;
220
221        // TODO: it would be best if the parser could operate on a stream, although that might
222        //       complicate inlining VTIMEZONEs that are at the end.
223
224        Component::parse_split(&raw)
225            .map_err(|e| ErrorKind::InvalidData.error(e))?
226            .iter()
227            .filter_map(|c| {
228                let item = Item::from(c.to_string());
229                if hrefs.contains(&item.ident()) {
230                    Some(Ok(FetchedItem {
231                        href: item.ident(),
232                        etag: item.hash().to_string().into(),
233                        item,
234                    }))
235                } else {
236                    None
237                }
238            })
239            .collect()
240    }
241
242    /// Fetch all items in the collection.
243    ///
244    /// Performs a single HTTP(s) request to fetch all items.
245    async fn get_all_items(&self, _collection: &str) -> Result<Vec<FetchedItem>> {
246        let raw = self.fetch_raw(&self.url).await?;
247
248        // TODO: it would be best if the parser could operate on a stream, although that might
249        //       complicate inlining VTIMEZONEs that are at the end.
250        let components =
251            Component::parse_split(&raw).map_err(|e| ErrorKind::InvalidData.error(e))?;
252
253        components
254            .iter()
255            .map(|c| {
256                let item = Item::from(c.to_string());
257                Ok(FetchedItem {
258                    href: item.ident(),
259                    etag: item.hash().to_string().into(),
260                    item,
261                })
262            })
263            .collect()
264    }
265
266    /// Unsupported for this storage type.
267    async fn create_item(
268        &self,
269        _collection: &str,
270        _: &Item,
271        _: CreateItemOptions,
272    ) -> Result<ItemVersion> {
273        Err(ErrorKind::Unsupported.error("adding items via webcal is not supported"))
274    }
275
276    /// Unsupported for this storage type.
277    async fn update_item(&self, _: &str, _: &Etag, _: &Item) -> Result<Etag> {
278        Err(ErrorKind::Unsupported.error("updating items via webcal is not supported"))
279    }
280
281    /// Unsupported for this storage type.
282    async fn set_property(&self, _: &str, _: Property, _: &str) -> Result<()> {
283        Err(ErrorKind::Unsupported.error("setting metadata via webcal is not supported"))
284    }
285
286    /// Unsupported for this storage type.
287    async fn unset_property(&self, _: &str, _: Property) -> Result<()> {
288        Err(ErrorKind::Unsupported.error("unsetting metadata via webcal is not supported"))
289    }
290
291    /// Unsupported for this storage type.
292    async fn get_property(&self, _: &str, _: Property) -> Result<Option<String>> {
293        // TODO: return None?
294        Err(ErrorKind::Unsupported.error("getting metadata via webcal is not supported"))
295    }
296
297    async fn delete_item(&self, _: &str, _: &Etag) -> Result<()> {
298        Err(ErrorKind::Unsupported.error("deleting items via webcal is not supported"))
299    }
300
301    fn href_for_collection_id(&self, id: &CollectionId) -> Result<Href> {
302        if id == &self.collection_id {
303            Ok(self.url.path().to_string())
304        } else {
305            Err(ErrorKind::Unsupported.error("discovery of arbitrary collections is not supported"))
306        }
307    }
308
309    async fn list_properties(&self, _: &str) -> Result<Vec<FetchedProperty>> {
310        Err(ErrorKind::Unsupported.error("webcal does not support properties"))
311    }
312}
313
314#[cfg(test)]
315#[cfg(feature = "webcal_test")]
316mod test {
317    use http::Uri;
318    use hyper_rustls::HttpsConnectorBuilder;
319    use hyper_util::{client::legacy::Client, rt::TokioExecutor};
320
321    use crate::{base::Storage, webcal::WebCalStorage};
322
323    // FIXME: use a webcal link hosted by me.
324    // TODO: these are just validation tests and not suitable as a keeper.
325    #[tokio::test]
326    async fn test_dummy() {
327        let connector = HttpsConnectorBuilder::new()
328            .with_native_roots()
329            .unwrap()
330            .https_or_http()
331            .enable_http1()
332            .build();
333        let client = Client::builder(TokioExecutor::new()).build(connector);
334        let storage = WebCalStorage::builder(
335            client,
336            Uri::try_from("https://www.officeholidays.com/ics/netherlands").unwrap(),
337            "holidays".parse().unwrap(),
338        )
339        .build();
340        storage.check().await.unwrap();
341        let collection = "holidays";
342        let discovery = &storage.discover_collections().await.unwrap();
343
344        assert_eq!(
345            &collection,
346            &discovery.collections().first().unwrap().href()
347        );
348
349        let item_vers = storage.list_items(collection).await.unwrap();
350
351        for item_ver in &item_vers {
352            let (_item, etag) = storage.get_item(&item_ver.href).await.unwrap();
353            // Might file if upstream file mutates between requests.
354            assert_eq!(etag, item_ver.etag);
355        }
356
357        let hrefs: Vec<Href> = item_vers.iter().map(|r| r.href.clone()).collect();
358        let many = storage.get_many_items(hrefs.clone()).await.unwrap();
359
360        assert_eq!(many.len(), hrefs.len());
361        assert_eq!(many.len(), item_vers.len());
362        // TODO: compare their contents and etags, though these should all match.
363    }
364}