Skip to main content

omry_archiving/
document.rs

1//! Types for documents to be stored in the flora archive.
2
3#[cfg(feature = "serde")]
4use serde::{Deserialize, Serialize};
5
6/// Types of documents that can be stored in the flora archive.
7#[non_exhaustive]
8#[derive(Debug, Clone, PartialEq)]
9#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
10pub enum Document {
11    /// A web page (typically HTML).
12    WebPage(ArchivedPage),
13}
14
15impl Document {
16    /// Creates a [`Document`] variant for a web page (HTML).
17    ///
18    /// Does not check that the given data is actually HTML.
19    #[must_use]
20    pub fn new_web_page_unchecked(data: Vec<u8>) -> Self {
21        Self::WebPage(ArchivedPage { data })
22    }
23
24    /// Returns the HTML data if the document is an HTML or text web page.
25    ///
26    /// If it's not a web page, returns `None`.
27    #[must_use]
28    pub fn into_web_page(self) -> Option<Vec<u8>> {
29        match self {
30            Self::WebPage(page) => Some(page.into_data()),
31        }
32    }
33}
34
35/// Type for web pages to be stored within flora's archive.
36#[derive(Debug, Clone, PartialEq)]
37#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
38pub struct ArchivedPage {
39    /// The data for this web page.
40    data: Vec<u8>,
41}
42
43impl ArchivedPage {
44    /// Returns a reference to the data for this page.
45    #[must_use]
46    pub fn data(&self) -> &[u8] {
47        &self.data
48    }
49
50    /// Returns the data for this page, consuming the wrapper.
51    #[must_use]
52    pub fn into_data(self) -> Vec<u8> {
53        self.data
54    }
55}
56
57#[cfg(test)]
58pub(crate) mod tests {
59    use super::*;
60
61    use std::env;
62    use std::fs::{self, File};
63    use std::io::Read;
64    use std::path::PathBuf;
65
66    use anyhow::Result;
67
68    #[cfg(feature = "marshal")]
69    use crate::Marshal;
70
71    const WORKSPACE_DIR_KEY: &str = "CARGO_WORKSPACE_DIR";
72    const TEST_WEB_PAGE_PATH: &str = "test_data/example.html";
73
74    pub fn get_test_page_path() -> Result<PathBuf> {
75        let (_, workspace_dir_path) = env::vars()
76            .find(|(k, _)| k == WORKSPACE_DIR_KEY)
77            .ok_or_else(|| anyhow::anyhow!("{WORKSPACE_DIR_KEY} is not set"))?;
78        let workspace_dir_path = fs::canonicalize(workspace_dir_path)?;
79        Ok(workspace_dir_path.join(TEST_WEB_PAGE_PATH))
80    }
81
82    pub(crate) fn load_test_web_doc() -> Result<Document> {
83        let example_page_path = get_test_page_path()?;
84        let mut example_file = File::open(&example_page_path)?;
85        let mut page_data = Vec::new();
86        example_file.read_to_end(&mut page_data)?;
87        Ok(Document::new_web_page_unchecked(page_data))
88    }
89
90    #[test]
91    #[cfg(feature = "marshal")]
92    fn can_marshal_document() -> Result<()> {
93        let archive_holder = load_test_web_doc()?;
94        let serialized = archive_holder.to_byte_vec()?;
95        let deserialized = Document::from_bytes(&serialized)?;
96        insta::assert_debug_snapshot!(&deserialized);
97        Ok(())
98    }
99
100    #[test]
101    fn can_take_owned_data() -> Result<()> {
102        let archive_holder = load_test_web_doc()?;
103        insta::assert_debug_snapshot!(archive_holder.into_web_page());
104        Ok(())
105    }
106}