1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
use crate::mf2::types::Item;
use microformats::types::PropertyValue;
use scraper::Selector;
use serde::{Deserialize, Serialize};
/// Logic around finding the representative h-card of a URL.
///
/// This kind of parsing allows for deeper introspection and fuller discovery
/// of how and where entities represent themselves. Learn more at
/// <https://microformats.org/wiki/representative-h-card-parsing>.
pub mod representative_hcard;
/// Logic for running post type discovery.
///
/// This module provides a means of detecting the known and experimental
/// post types provided by the IndieWeb community.
pub mod ptd;
/// Logic around discerning relationships between two URLs.
///
/// This module provides an implementation for resolving and
/// discovering the advertised link relationships. This is one
/// of the more popular methods of resource discovery in the IndieWeb
/// of providers of the [standards][crate::standards].
pub mod link_rel;
/// A normalized representation of properties from Microformats2 JSON.
///
/// This represents a "middle" type for converting Microformats2 JSON into
/// something more structured like an [item][microformats::types::Item].
#[derive(Clone, Debug, Serialize, Deserialize, Default, PartialEq, Eq)]
pub struct Properties(pub serde_json::Map<String, serde_json::Value>);
impl std::ops::Deref for Properties {
type Target = serde_json::Map<String, serde_json::Value>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl std::ops::DerefMut for Properties {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl From<Properties> for serde_json::Value {
fn from(val: Properties) -> Self {
serde_json::Value::Object(val.0)
}
}
impl TryFrom<serde_json::Value> for Properties {
type Error = serde_json::Error;
fn try_from(val: serde_json::Value) -> Result<Self, Self::Error> {
serde_json::from_value::<Self>(val)
}
}
impl Properties {
/// Creates a copy of this set of properties that's been normalized.
///
/// # Examples
/// ```
/// # use indieweb::algorithms::Properties;
/// # use serde_json::json;
/// #
/// assert_eq!(
/// Properties::try_from(json!({"properties": {"actual": "value"}})).unwrap().normalize(),
/// Properties::try_from(json!({"actual": "value"})).unwrap(),
/// "use 'properties' as the value"
/// );
/// #
/// assert_eq!(
/// Properties::try_from(json!({"actual": "value"})).unwrap().normalize(),
/// Properties::try_from(json!({"actual": "value"})).unwrap(),
/// "returns the keys and values as they are"
/// );
/// #
/// ```
pub fn normalize(&self) -> Properties {
if self.contains_key("properties") {
Properties(
self.get("properties")
.and_then(|p| p.as_object().cloned())
.unwrap_or_default(),
)
} else {
self.clone()
}
}
}
/// Pulls all of the URLs from this item.
///
/// This extracts all of the discoverable URLs from an item. This will
/// pull from:
///
/// * [item.value][microformats::types::Item::value] if it's a [URL value][microformats::types::ValueKind::Url]
/// * any property whose value is a string that can be determined to be a URL
/// * the values of links from the HTML of a fragment
pub fn extract_urls(item: &Item) -> Vec<url::Url> {
let mut all_urls = vec![];
let link_selector = Selector::parse("a[href]").unwrap();
all_urls.extend(
item.children()
.iter()
.filter_map(|child| child.value())
.filter_map(|v| match v {
microformats::types::ValueKind::Url(u) => Some(u),
microformats::types::ValueKind::Plain(_) => None,
}),
);
all_urls.extend(
item.properties()
.values()
.into_iter()
.flatten()
.filter_map(|v| match v {
PropertyValue::Url(u) => Some(vec![u.clone()]),
PropertyValue::Item(i) => Some(extract_urls(i)),
PropertyValue::Fragment(f) => {
let fragment = scraper::Html::parse_fragment(&f.html);
let urls = fragment
.select(&link_selector)
.into_iter()
.filter_map(|elem| elem.value().attr("href"))
.filter_map(|u| u.parse().ok())
.collect::<Vec<_>>();
Some(urls)
}
_ => None,
})
.flatten(),
);
all_urls
}
#[test]
fn extract_urls_test() {
let item_result = Item::try_from(serde_json::json!({
"type": ["h-entry"],
"properties": {
"content": [{"html": "Well this is a link <a href='http://example.com/3'>fooo</a>", "value": "Well this is a link fooo"}],
"like-of": ["http://example.com/", "http://example.com/2"]
}
}));
assert_eq!(item_result.as_ref().err(), None);
let item = item_result.unwrap();
assert_eq!(extract_urls(&item).len(), 3);
}