Skip to main content

vstorage/
webcal.rs

1// Copyright 2023-2024 Hugo Osvaldo Barrera
2//
3// SPDX-License-Identifier: EUPL-1.2
4
5//! Implements reading entries from a remote webcal resource.
6//!
7//! Webcal is a de-facto standard, and is basically a single icalendar file hosted via http(s).
8//!
9//! See the [Webcal wikipedia page](https://en.wikipedia.org/wiki/Webcal).
10
11use async_trait::async_trait;
12use http::{Method, Request, Response, StatusCode, Uri};
13use http_body_util::BodyExt;
14use hyper::body::Incoming;
15use tower::Service;
16
17use crate::{
18    CollectionId, Error, ErrorKind, Etag, Href, ItemKind, Result,
19    base::{
20        Collection, CreateItemOptions, FetchedItem, FetchedProperty, Item, ItemVersion, Storage,
21    },
22    disco::{DiscoveredCollection, Discovery},
23    property::Property,
24    simple_component::Component,
25};
26
27/// Builder for [`WebCalStorage`].
28pub struct WebCalStorageBuilder<C>
29where
30    C: Service<Request<String>, Response = Response<Incoming>> + Send + Sync + Clone + 'static,
31    C::Error: std::error::Error + Send + Sync,
32    C::Future: Send + Sync,
33{
34    http_client: C,
35    url: Uri,
36    collection_id: CollectionId,
37}
38
39impl<C> WebCalStorageBuilder<C>
40where
41    C: Service<Request<String>, Response = Response<Incoming>> + Send + Sync + Clone + 'static,
42    C::Error: std::error::Error + Send + Sync,
43    C::Future: Send + Sync,
44{
45    /// Build the storage instance.
46    ///
47    /// Synchronous as no I/O operations are required.
48    #[must_use]
49    pub fn build(self) -> WebCalStorage<C> {
50        WebCalStorage {
51            url: self.url,
52            collection_id: self.collection_id,
53            http_client: self.http_client,
54        }
55    }
56}
57
58/// A storage which exposes items in remote icalendar resource.
59///
60/// A webcal storage contains exactly one collection, which contains all the entires found in the
61/// remote resource. The name of this single collection is specified via the `collection_id`
62/// argument.
63///
64/// This storage is a bit of an odd one (since in reality, there's no concept of collections in
65/// webcal). The extra abstraction layer is here merely to match the format of other storages.
66///
67/// # Href
68///
69/// The `href` for this meaningless. A string matching the `collection_id` property is used to
70/// describe the only available collection.
71// TODO: If an alternative href is provided, it should be used as a path on the same host.
72//       Note that discovery will only support the one matching the input URL.
73pub struct WebCalStorage<C>
74where
75    // XXX: Clone can be dropped after tower drops it `mut` for `tower_service::Service::call`
76    //      See: https://github.com/hyperium/hyper/issues/3784#issuecomment-2491667302
77    //      See: https://github.com/tower-rs/tower/issues/753
78    C: Service<Request<String>, Response = Response<Incoming>> + Send + Sync + Clone + 'static,
79    C::Error: std::error::Error + Send + Sync,
80    C::Future: Send + Sync,
81{
82    /// The URL of the remote icalendar resource. Must be HTTP or HTTPS.
83    url: Uri,
84    /// The href and id to be given to the single collection available.
85    collection_id: CollectionId,
86    http_client: C,
87}
88
89impl<C> WebCalStorage<C>
90where
91    C: Service<Request<String>, Response = Response<Incoming>> + Send + Sync + Clone + 'static,
92    C::Error: std::error::Error + Send + Sync,
93    C::Future: Send + Sync,
94{
95    /// Create a builder for this storage type.
96    #[must_use]
97    pub fn builder(
98        http_client: C,
99        url: Uri,
100        collection_id: CollectionId,
101    ) -> WebCalStorageBuilder<C> {
102        WebCalStorageBuilder {
103            http_client,
104            url,
105            collection_id,
106        }
107    }
108
109    /// Helper method to fetch a URL and return its body as a String.
110    ///
111    /// Be warned! This swallows headers (including `Etag`!).
112    async fn fetch_raw(&self, url: &Uri) -> Result<String> {
113        let req = Request::builder()
114            .method(Method::GET)
115            .uri(url)
116            .body(String::new())
117            .map_err(|e| ErrorKind::InvalidInput.error(e))?;
118        let response = self
119            .http_client
120            .clone()
121            .call(req)
122            .await
123            .map_err(|e| ErrorKind::Io.error(e))?;
124
125        match response.status() {
126            StatusCode::NOT_FOUND | StatusCode::GONE => {
127                return Err(ErrorKind::DoesNotExist.error("The remote resource does not exist."));
128            }
129            StatusCode::OK => {}
130            code => {
131                return Err(ErrorKind::Io.error(format!("request returned {code}")));
132            }
133        }
134
135        let (_head, body) = response.into_parts();
136        let data = body
137            .collect()
138            .await
139            .map_err(|e| ErrorKind::Io.error(e))?
140            .to_bytes();
141
142        // TODO: handle non-UTF-8 data (e.g.: Content-Type/charset).
143        // TODO: can I avoid making a copy of the entire response here?
144        String::from_utf8(data.to_vec()).map_err(|e| ErrorKind::InvalidData.error(e))
145    }
146}
147
148#[async_trait]
149impl<C> Storage for WebCalStorage<C>
150where
151    C: Service<Request<String>, Response = Response<Incoming>> + Send + Sync + Clone + 'static,
152    C::Error: std::error::Error + Send + Sync,
153    C::Future: Send + Sync,
154{
155    fn item_kind(&self) -> ItemKind {
156        ItemKind::Calendar
157    }
158
159    /// Checks that the remove resource exists and whether it looks like an icalendar resource.
160    async fn check(&self) -> Result<()> {
161        // TODO: Should map status codes to io::Error. if 404 -> NotFound, etc.
162        let raw = self.fetch_raw(&self.url).await?;
163
164        if !raw.starts_with("BEGIN:VCALENDAR") {
165            return Err(
166                ErrorKind::InvalidData.error("response for URL doesn't look like a calendar")
167            );
168        }
169        Ok(())
170    }
171
172    /// Returns a single collection with the name originally specified.
173    async fn discover_collections(&self) -> Result<Discovery> {
174        // TODO: shouldn't I check that the collection actually exists?
175        vec![DiscoveredCollection::new(
176            self.url.path().to_string(),
177            self.collection_id.clone(),
178        )]
179        .try_into()
180        .map_err(|e| ErrorKind::InvalidData.error(e))
181    }
182
183    /// Unsupported for this storage type.
184    async fn create_collection(&self, _: &str) -> Result<Collection> {
185        Err(ErrorKind::Unsupported.error("creating collections via webcal is not supported"))
186    }
187
188    /// Unsupported for this storage type.
189    async fn delete_collection(&self, _: &str) -> Result<()> {
190        Err(ErrorKind::Unsupported.error("destroying collections via webcal is not supported"))
191    }
192
193    /// Enumerates items in this collection.
194    ///
195    /// Note that, due to the nature of webcal, the whole collection needs to be retrieved. If some
196    /// items need to be read as well, it is generally best to use
197    /// [`WebCalStorage::get_all_items`] instead.
198    async fn list_items(&self, _collection: &str) -> Result<Vec<ItemVersion>> {
199        let raw = self.fetch_raw(&self.url).await?;
200
201        // TODO: it would be best if the parser could operate on a stream, although that might
202        //       complicate copying VTIMEZONEs inline if they are at the end of the stream.
203        let refs = Component::parse(&raw)
204            .map_err(|e| ErrorKind::InvalidData.error(e))?
205            .into_split_collection()
206            .map_err(|e| ErrorKind::InvalidData.error(e))?
207            .iter()
208            .map(|c| {
209                let item = Item::from(c.to_string());
210                let hash = item.hash();
211
212                ItemVersion::new(item.ident(), Etag::from(hash.to_string()))
213            })
214            .collect();
215
216        Ok(refs)
217    }
218
219    /// Returns a single item from the collection.
220    ///
221    /// Note that, due to the nature of webcal, the whole collection needs to be retrieved. It is
222    /// strongly recommended to use [`WebCalStorage::get_all_items`] instead.
223    async fn get_item(&self, href: &str) -> Result<(Item, Etag)> {
224        let raw = self.fetch_raw(&self.url).await?;
225
226        // TODO: it would be best if the parser could operate on a stream, although that might
227        //       complicate inlining VTIMEZONEs that are at the end.
228        let item = Component::parse(&raw)
229            .map_err(|e| ErrorKind::InvalidData.error(e))?
230            .into_split_collection()
231            .map_err(|e| ErrorKind::InvalidData.error(e))?
232            .iter()
233            .find_map(|c| {
234                let item = Item::from(c.to_string());
235                if item.ident() == href {
236                    Some(item)
237                } else {
238                    None
239                }
240            })
241            .ok_or_else(|| Error::from(ErrorKind::DoesNotExist))?;
242
243        let hash = item.hash();
244        Ok((item, hash.to_string().into()))
245    }
246
247    /// Returns multiple items from the collection.
248    ///
249    /// Note that, due to the nature of webcal, the whole collection needs to be retrieved. It is
250    /// generally best to use [`WebCalStorage::get_all_items`] instead.
251    async fn get_many_items(&self, hrefs: &[&str]) -> Result<Vec<FetchedItem>> {
252        let raw = self.fetch_raw(&self.url).await?;
253
254        // TODO: it would be best if the parser could operate on a stream, although that might
255        //       complicate inlining VTIMEZONEs that are at the end.
256
257        Component::parse(&raw)
258            .map_err(|e| ErrorKind::InvalidData.error(e))?
259            .into_split_collection()
260            .map_err(|e| ErrorKind::InvalidData.error(e))?
261            .iter()
262            .filter_map(|c| {
263                let item = Item::from(c.to_string());
264                if hrefs.contains(&(item.ident().as_ref())) {
265                    Some(Ok(FetchedItem {
266                        href: item.ident(),
267                        etag: item.hash().to_string().into(),
268                        item,
269                    }))
270                } else {
271                    None
272                }
273            })
274            .collect()
275    }
276
277    /// Fetch all items in the collection.
278    ///
279    /// Performs a single HTTP(s) request to fetch all items.
280    async fn get_all_items(&self, _collection: &str) -> Result<Vec<FetchedItem>> {
281        let raw = self.fetch_raw(&self.url).await?;
282
283        // TODO: it would be best if the parser could operate on a stream, although that might
284        //       complicate inlining VTIMEZONEs that are at the end.
285        let components = Component::parse(&raw)
286            .map_err(|e| ErrorKind::InvalidData.error(e))?
287            .into_split_collection()
288            .map_err(|e| ErrorKind::InvalidData.error(e))?;
289
290        components
291            .iter()
292            .map(|c| {
293                let item = Item::from(c.to_string());
294                Ok(FetchedItem {
295                    href: item.ident(),
296                    etag: item.hash().to_string().into(),
297                    item,
298                })
299            })
300            .collect()
301    }
302
303    /// Unsupported for this storage type.
304    async fn create_item(
305        &self,
306        _collection: &str,
307        _: &Item,
308        _: CreateItemOptions,
309    ) -> Result<ItemVersion> {
310        Err(ErrorKind::Unsupported.error("adding items via webcal is not supported"))
311    }
312
313    /// Unsupported for this storage type.
314    async fn update_item(&self, _: &str, _: &Etag, _: &Item) -> Result<Etag> {
315        Err(ErrorKind::Unsupported.error("updating items via webcal is not supported"))
316    }
317
318    /// Unsupported for this storage type.
319    async fn set_property(&self, _: &str, _: Property, _: &str) -> Result<()> {
320        Err(ErrorKind::Unsupported.error("setting metadata via webcal is not supported"))
321    }
322
323    /// Unsupported for this storage type.
324    async fn unset_property(&self, _: &str, _: Property) -> Result<()> {
325        Err(ErrorKind::Unsupported.error("unsetting metadata via webcal is not supported"))
326    }
327
328    /// Unsupported for this storage type.
329    async fn get_property(&self, _: &str, _: Property) -> Result<Option<String>> {
330        // TODO: return None?
331        Err(ErrorKind::Unsupported.error("getting metadata via webcal is not supported"))
332    }
333
334    async fn delete_item(&self, _: &str, _: &Etag) -> Result<()> {
335        Err(ErrorKind::Unsupported.error("deleting items via webcal is not supported"))
336    }
337
338    fn href_for_collection_id(&self, id: &CollectionId) -> Result<Href> {
339        if id == &self.collection_id {
340            Ok(self.url.path().to_string())
341        } else {
342            Err(ErrorKind::Unsupported.error("discovery of arbitrary collections is not supported"))
343        }
344    }
345
346    async fn list_properties(&self, _: &str) -> Result<Vec<FetchedProperty>> {
347        Err(ErrorKind::Unsupported.error("webcal does not support properties"))
348    }
349}
350
351#[cfg(test)]
352#[cfg(feature = "webcal_test")]
353mod test {
354    use http::Uri;
355    use hyper_rustls::HttpsConnectorBuilder;
356    use hyper_util::{client::legacy::Client, rt::TokioExecutor};
357
358    use crate::{base::Storage, webcal::WebCalStorage};
359
360    // FIXME: use a webcal link hosted by me.
361    // TODO: these are just validation tests and not suitable as a keeper.
362    #[tokio::test]
363    async fn test_dummy() {
364        let connector = HttpsConnectorBuilder::new()
365            .with_native_roots()
366            .unwrap()
367            .https_or_http()
368            .enable_http1()
369            .build();
370        let client = Client::builder(TokioExecutor::new()).build(connector);
371        let storage = WebCalStorage::builder(
372            client,
373            Uri::try_from("https://www.officeholidays.com/ics/netherlands").unwrap(),
374            "holidays".parse().unwrap(),
375        )
376        .build();
377        storage.check().await.unwrap();
378        let collection = "holidays";
379        let discovery = &storage.discover_collections().await.unwrap();
380
381        assert_eq!(
382            &collection,
383            &discovery.collections().first().unwrap().href()
384        );
385
386        let item_vers = storage.list_items(collection).await.unwrap();
387
388        for item_ver in &item_vers {
389            let (_item, etag) = storage.get_item(&item_ver.href).await.unwrap();
390            // Might file if upstream file mutates between requests.
391            assert_eq!(etag, item_ver.etag);
392        }
393
394        let hrefs: Vec<&str> = item_vers.iter().map(|r| r.href.as_ref()).collect();
395        let many = storage.get_many_items(&hrefs.clone()).await.unwrap();
396
397        assert_eq!(many.len(), hrefs.len());
398        assert_eq!(many.len(), item_vers.len());
399        // TODO: compare their contents and etags, though these should all match.
400    }
401}