indieweb/algorithms/
mod.rs

1use crate::mf2;
2
3use mf2::types::PropertyValue;
4use mf2::types::{Fragment, Item};
5use serde::{Deserialize, Serialize};
6
7/// Logic around finding the representative h-card of a URL.
8///
9/// This kind of parsing allows for deeper introspection and fuller discovery
10/// of how and where entities represent themselves. Learn more at
11/// <https://microformats.org/wiki/representative-h-card-parsing>.
12pub mod representative_hcard;
13
14/// Logic for running post type discovery.
15///
16/// This module provides a means of detecting the known and experimental
17/// post types provided by the IndieWeb community.
18pub mod ptd;
19
20/// Logic around discerning relationships between two URLs.
21///
22/// This module provides an implementation for resolving and
23/// discovering the advertised link relationships. This is one
24/// of the more popular methods of resource discovery in the IndieWeb
25/// of providers of the [standards][crate::standards].
26pub mod link_rel;
27
28/// A normalized representation of properties from Microformats2 JSON.
29///
30/// This represents a "middle" type for converting Microformats2 JSON into
31/// something more structured like an [item][microformats::types::Item].
32#[derive(Clone, Debug, Serialize, Deserialize, Default, PartialEq, Eq)]
33pub struct Properties(pub serde_json::Map<String, serde_json::Value>);
34
35impl std::ops::Deref for Properties {
36    type Target = serde_json::Map<String, serde_json::Value>;
37
38    fn deref(&self) -> &Self::Target {
39        &self.0
40    }
41}
42
43impl std::ops::DerefMut for Properties {
44    fn deref_mut(&mut self) -> &mut Self::Target {
45        &mut self.0
46    }
47}
48
49impl From<Properties> for serde_json::Value {
50    fn from(val: Properties) -> Self {
51        serde_json::Value::Object(val.0)
52    }
53}
54
55impl TryFrom<serde_json::Value> for Properties {
56    type Error = serde_json::Error;
57
58    fn try_from(val: serde_json::Value) -> Result<Self, Self::Error> {
59        serde_json::from_value::<Self>(val)
60    }
61}
62
63impl Properties {
64    /// Creates a copy of this set of properties that's been normalized.
65    ///
66    /// # Examples
67    /// ```
68    /// # use indieweb::algorithms::Properties;
69    /// # use serde_json::json;
70    /// #
71    /// assert_eq!(
72    ///     Properties::try_from(json!({"properties": {"actual": "value"}})).unwrap().normalize(),
73    ///     Properties::try_from(json!({"actual": "value"})).unwrap(),
74    ///     "use 'properties' as the value"
75    /// );
76    /// #
77    /// assert_eq!(
78    ///     Properties::try_from(json!({"actual": "value"})).unwrap().normalize(),
79    ///     Properties::try_from(json!({"actual": "value"})).unwrap(),
80    ///     "returns the keys and values as they are"
81    /// );
82    /// #
83    /// ```
84    pub fn normalize(&self) -> Properties {
85        if self.contains_key("properties") {
86            Properties(
87                self.get("properties")
88                    .and_then(|p| p.as_object().cloned())
89                    .unwrap_or_default(),
90            )
91        } else {
92            self.clone()
93        }
94    }
95}
96
97/// Pulls all of the URLs from this item.
98///
99/// This extracts all of the discoverable URLs from an item. This will
100/// pull from:
101///
102/// * [item.value][microformats::types::Item::value] if it's a [URL value][microformats::types::ValueKind::Url]
103/// * any property whose values are hinted as a [URL][microformats::types::ValueKind::Url]
104/// * the declared [links of a HTML fragment][microformats::types::Fragment]
105pub fn extract_urls(item: &Item) -> Vec<url::Url> {
106    let mut all_urls = vec![];
107
108    all_urls.extend(
109        item.children
110            .iter()
111            .filter_map(|child| child.value.to_owned())
112            .filter_map(|v| match v {
113                microformats::types::ValueKind::Url(u) => Some(u),
114                microformats::types::ValueKind::Plain(_) => None,
115            }),
116    );
117    all_urls.extend(
118        item.properties
119            .values()
120            .flatten()
121            .filter_map(|v| match v {
122                PropertyValue::Url(u) => Some(vec![u.clone()]),
123                PropertyValue::Item(ref i) => Some(extract_urls(i)),
124                PropertyValue::Fragment(Fragment { links, .. }) => Some(
125                    links
126                        .iter()
127                        .filter_map(|v| v.parse().ok())
128                        .collect::<Vec<_>>(),
129                ),
130                _ => None,
131            })
132            .flatten(),
133    );
134    all_urls
135}
136
137#[test]
138fn extract_urls_test() {
139    let item_result = Item::try_from(serde_json::json!({
140        "type": ["h-entry"],
141        "properties": {
142            "content": [{"html": "Well this is a link <a href='http://example.com/3'>fooo</a>", "value": "Well this is a link fooo"}],
143            "like-of": ["http://example.com/", "http://example.com/2"]
144        }
145    }));
146
147    assert_eq!(item_result.as_ref().err(), None);
148    let item = item_result.unwrap();
149
150    // This _should_ be three but it's two because the Microformats library doesn't do extra
151    // procesing yet on deserializing of values.
152    assert_eq!(extract_urls(&item).len(), 2);
153}