Skip to main content

microformats/
lib.rs

1pub mod jf2;
2
3#[cfg(feature = "cleaner")]
4pub mod extensions;
5
6#[cfg(feature = "cleaner")]
7pub use extensions::CleanedDocument;
8
9pub mod parse;
10
11#[cfg(feature = "picture")]
12pub use parse::{Picture, PictureSource, SrcSetEntry};
13
14pub use microformats_types as types;
15
16#[derive(thiserror::Error, Debug)]
17pub enum Error {
18    #[error(transparent)]
19    Parse(#[from] parse::Error),
20
21    #[error(transparent)]
22    Types(#[from] types::Error),
23
24    #[error(transparent)]
25    IO(#[from] std::io::Error),
26
27    #[error("Failed to generate HTML: {0}")]
28    HtmlCodegen(String),
29
30    #[error(transparent)]
31    FromUtf8(#[from] std::string::FromUtf8Error),
32
33    #[error("The required property {name:} was not of the type {kind:?}")]
34    InvalidRequiredProperty { name: String, kind: String },
35
36    #[error(transparent)]
37    Json(#[from] serde_json::Error),
38
39    #[error(transparent)]
40    Jf2Profile(#[from] jf2::profiles::Error),
41}
42
43impl PartialEq for Error {
44    fn eq(&self, other: &Self) -> bool {
45        self.to_string().eq(&other.to_string())
46    }
47}
48
49impl From<url::ParseError> for Error {
50    fn from(value: url::ParseError) -> Self {
51        Self::Parse(parse::Error::from(value))
52    }
53}
54
55impl From<microformats_types::temporal::Error> for Error {
56    fn from(value: microformats_types::temporal::Error) -> Self {
57        Self::Types(types::Error::from(value))
58    }
59}
60
61/// Parses the provided HTML into a `types::Document` resolved with the proviedd URL.
62///
63/// ```
64/// use microformats::from_html;
65///
66/// let base_url: url::Url = "https://example.com".parse().unwrap();
67/// let document = from_html(r#"
68/// <html>
69///     <head>
70///         <link rel="author me" href="/author">
71///     </head>
72///     <body>
73///     </body>
74/// </html>
75/// "#, &base_url);
76///
77/// assert!(document.is_ok());
78/// ```
79pub fn from_html(html: &str, url: &url::Url) -> Result<types::Document, Error> {
80    parse::Parser::from_html(html.to_string())
81        .and_then(|mut parser| parser.into_document(Some(url.clone())))
82}
83
84/// Parses the HTML stored in the provided reader into a `types::Document` resolved with the provided URL.
85pub fn from_reader<R>(mut reader: R, url: &url::Url) -> Result<types::Document, Error>
86where
87    R: std::io::BufRead,
88{
89    let mut html = String::with_capacity(16384);
90    reader
91        .read_to_string(&mut html)
92        .map_err(Error::IO)
93        .and_then(|_| from_html(&html, url))
94}
95
96/// Parses the provided JSON string into a `types::Document`.
97///
98/// ```
99/// use microformats::from_json_string;
100/// use url::Url;
101///
102/// let json = r#"
103/// {
104///   "items": [{
105///     "type": ["h-card"],
106///     "properties": {
107///       "name": ["John Doe"]
108///     }
109///   }]
110/// }
111/// "#;
112///
113/// let url: Url = "https://example.com".parse().unwrap();
114/// let document = from_json_string(json, &url);
115///
116/// assert!(document.is_ok());
117/// ```
118pub fn from_json_string(json: &str, url: &url::Url) -> Result<types::Document, Error> {
119    let mut document: types::Document = serde_json::from_str(json).map_err(Error::Json)?;
120
121    // Set the URL if not already present
122    if document.url.is_none() {
123        document.url = Some(url.clone());
124    }
125
126    Ok(document)
127}
128
129/// Parses the JSON stored in the provided reader into a `types::Document` resolved with the provided URL.
130pub fn from_json_reader<R>(mut reader: R, url: &url::Url) -> Result<types::Document, Error>
131where
132    R: std::io::BufRead,
133{
134    let mut json = String::with_capacity(16384);
135    reader
136        .read_to_string(&mut json)
137        .map_err(Error::IO)
138        .and_then(|_| from_json_string(&json, url))
139}
140
141pub mod http;