1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
use crate::mf2::types::Item;

use microformats::types::PropertyValue;
use scraper::Selector;
use serde::{Deserialize, Serialize};

/// Logic around finding the representative h-card of a URL.
///
/// This kind of parsing allows for deeper introspection and fuller discovery
/// of how and where entities represent themselves. Learn more at
/// <https://microformats.org/wiki/representative-h-card-parsing>.
pub mod representative_hcard;

/// Logic for running post type discovery.
///
/// This module provides a means of detecting the known and experimental
/// post types provided by the IndieWeb community.
pub mod ptd;

/// Logic around discerning relationships between two URLs.
///
/// This module provides an implementation for resolving and
/// discovering the advertised link relationships. This is one
/// of the more popular methods of resource discovery in the IndieWeb
/// of providers of the [standards][crate::standards].
pub mod link_rel;

/// A normalized representation of properties from Microformats2 JSON.
///
/// This represents a "middle" type for converting Microformats2 JSON into
/// something more structured like an [item][microformats::types::Item].
#[derive(Clone, Debug, Serialize, Deserialize, Default, PartialEq, Eq)]
pub struct Properties(pub serde_json::Map<String, serde_json::Value>);

impl std::ops::Deref for Properties {
    type Target = serde_json::Map<String, serde_json::Value>;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

impl std::ops::DerefMut for Properties {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.0
    }
}

impl From<Properties> for serde_json::Value {
    fn from(val: Properties) -> Self {
        serde_json::Value::Object(val.0)
    }
}

impl TryFrom<serde_json::Value> for Properties {
    type Error = serde_json::Error;

    fn try_from(val: serde_json::Value) -> Result<Self, Self::Error> {
        serde_json::from_value::<Self>(val)
    }
}

impl Properties {
    /// Creates a copy of this set of properties that's been normalized.
    ///
    /// # Examples
    /// ```
    /// # use indieweb::algorithms::Properties;
    /// # use serde_json::json;
    /// #
    /// assert_eq!(
    ///     Properties::try_from(json!({"properties": {"actual": "value"}})).unwrap().normalize(),
    ///     Properties::try_from(json!({"actual": "value"})).unwrap(),
    ///     "use 'properties' as the value"
    /// );
    /// #
    /// assert_eq!(
    ///     Properties::try_from(json!({"actual": "value"})).unwrap().normalize(),
    ///     Properties::try_from(json!({"actual": "value"})).unwrap(),
    ///     "returns the keys and values as they are"
    /// );
    /// #
    /// ```
    pub fn normalize(&self) -> Properties {
        if self.contains_key("properties") {
            Properties(
                self.get("properties")
                    .and_then(|p| p.as_object().cloned())
                    .unwrap_or_default(),
            )
        } else {
            self.clone()
        }
    }
}

/// Pulls all of the URLs from this item.
///
/// This extracts all of the discoverable URLs from an item. This will
/// pull from:
///
/// * [item.value][microformats::types::Item::value] if it's a [URL value][microformats::types::ValueKind::Url]
/// * any property whose value is a string that can be determined to be a URL
/// * the values of links from the HTML of a fragment
pub fn extract_urls(item: &Item) -> Vec<url::Url> {
    let mut all_urls = vec![];

    let link_selector = Selector::parse("a[href]").unwrap();

    all_urls.extend(
        item.children()
            .iter()
            .filter_map(|child| child.value())
            .filter_map(|v| match v {
                microformats::types::ValueKind::Url(u) => Some(u),
                microformats::types::ValueKind::Plain(_) => None,
            }),
    );
    all_urls.extend(
        item.properties()
            .values()
            .into_iter()
            .flatten()
            .filter_map(|v| match v {
                PropertyValue::Url(u) => Some(vec![u.clone()]),
                PropertyValue::Item(i) => Some(extract_urls(i)),
                PropertyValue::Fragment(f) => {
                    let fragment = scraper::Html::parse_fragment(&f.html);
                    let urls = fragment
                        .select(&link_selector)
                        .into_iter()
                        .filter_map(|elem| elem.value().attr("href"))
                        .filter_map(|u| u.parse().ok())
                        .collect::<Vec<_>>();
                    Some(urls)
                }
                _ => None,
            })
            .flatten(),
    );
    all_urls
}

#[test]
fn extract_urls_test() {
    let item_result = Item::try_from(serde_json::json!({
        "type": ["h-entry"],
        "properties": {
            "content": [{"html": "Well this is a link <a href='http://example.com/3'>fooo</a>", "value": "Well this is a link fooo"}],
            "like-of": ["http://example.com/", "http://example.com/2"]
        }
    }));

    assert_eq!(item_result.as_ref().err(), None);
    let item = item_result.unwrap();

    assert_eq!(extract_urls(&item).len(), 3);
}