use crate::{algorithms::link_rel, http, mf2};
use ::http::Request;
use microformats::types::{Class, KnownClass, PropertyValue};
use microformats::types::{Document, Item};
use url::Url;
#[derive(Debug, Clone, PartialEq)]
pub struct Author {
pub name: String,
pub url: Option<Url>,
pub photo: Option<Url>,
pub h_card: Option<Item>,
}
#[derive(Debug, thiserror::Error)]
pub enum AuthorshipError {
#[error("No deterministic author could be found")]
NoAuthorFound,
#[error("Network error: {0}")]
NetworkError(#[from] crate::Error),
#[error("Invalid URL: {0}")]
InvalidUrl(#[from] url::ParseError),
#[error("No h-entry provided")]
NoEntry,
#[error("Failed to parse microformats2")]
ParseError,
}
#[derive(Debug, Clone, PartialEq)]
enum AuthorProperty {
HCard(Item),
Url(Url),
Name(String),
}
fn is_permalink_page(entry: &Item, page_url: &Url) -> bool {
for property_name in ["u-url", "url"] {
if let Some(values) = entry.properties.get(property_name)
&& let Some(value) = values.first()
{
match value {
PropertyValue::Url(url) => {
if url.as_str() == page_url.as_str() {
return true;
}
}
PropertyValue::Plain(text) => {
if let Ok(url) = Url::parse(text)
&& url.as_str() == page_url.as_str()
{
return true;
}
}
_ => continue,
}
}
}
false
}
#[tracing::instrument(skip(client, entry, document))]
pub async fn resolve_author(
client: &impl http::Client,
entry: &Item,
document: &Document,
page_url: &Url,
) -> Result<Author, AuthorshipError> {
if !entry
.r#type
.iter()
.any(|t| t == &Class::Known(KnownClass::Entry))
{
return Err(AuthorshipError::NoEntry);
}
if let Some(author_property) = extract_author_from_entry(entry) {
return process_author_property(client, author_property, page_url).await;
}
if let Some(feed_author) = extract_author_from_parent_feed(entry, document) {
tracing::trace!("Found author from parent h-feed");
return process_author_property(client, feed_author, page_url).await;
}
if is_permalink_page(entry, page_url) {
let rel_map = link_rel::for_url(client, page_url, &["author"], "GET").await?;
if let Some(author_urls) = rel_map.get("author")
&& let Some(author_page_url) = author_urls.first()
{
tracing::trace!("Found rel-author link: {}", author_page_url);
return resolve_author_from_page(client, author_page_url, page_url).await;
}
} else {
tracing::trace!("Skipping rel-author check - not a permalink page");
}
Err(AuthorshipError::NoAuthorFound)
}
fn extract_author_from_entry(entry: &Item) -> Option<AuthorProperty> {
for property_name in ["author", "p-author", "u-author"] {
if let Some(author_values) = entry.properties.get(property_name)
&& let Some(author) = author_values.first()
{
return match author {
PropertyValue::Item(item) if item.is_h_card() => {
tracing::trace!("Found embedded h-card author");
Some(AuthorProperty::HCard(item.clone()))
}
PropertyValue::Url(url) => {
tracing::trace!("Found URL author: {}", url);
Some(AuthorProperty::Url((**url).clone()))
}
PropertyValue::Plain(text) if !text.is_empty() => {
tracing::trace!("Found text author: {}", text);
Some(AuthorProperty::Name(text.to_string()))
}
_ => None,
};
}
}
None
}
fn extract_author_from_parent_feed(entry: &Item, document: &Document) -> Option<AuthorProperty> {
tracing::trace!("Checking for parent h-feed with author property");
let feeds = document
.items
.iter()
.filter(|item| item.r#type.contains(&Class::Known(KnownClass::Feed)))
.collect::<Vec<_>>();
tracing::trace!("Found {} h-feed(s) in document", feeds.len());
for feed in feeds {
let entry_in_feed = feed.children.iter().any(|child| {
std::ptr::eq(child, entry) || {
child.properties == entry.properties
}
});
if entry_in_feed {
tracing::trace!("Entry found in h-feed children");
for property_name in ["author", "p-author", "u-author"] {
if let Some(author_values) = feed.properties.get(property_name)
&& let Some(author) = author_values.first()
{
return match author {
PropertyValue::Item(item) if item.is_h_card() => {
tracing::trace!("Found h-feed author as h-card");
Some(AuthorProperty::HCard(item.clone()))
}
PropertyValue::Url(url) => {
tracing::trace!("Found h-feed author as URL: {}", url);
Some(AuthorProperty::Url((**url).clone()))
}
PropertyValue::Plain(text) if !text.is_empty() => {
tracing::trace!("Found h-feed author as text: {}", text);
Some(AuthorProperty::Name(text.to_string()))
}
_ => None,
};
}
}
tracing::trace!("h-feed has no author property");
return None;
}
}
tracing::trace!("Entry not found in any h-feed's children");
None
}
async fn process_author_property(
client: &impl http::Client,
author_property: AuthorProperty,
page_url: &Url,
) -> Result<Author, AuthorshipError> {
match author_property {
AuthorProperty::HCard(h_card) => {
let name = extract_name_from_h_card(&h_card)?;
let url = extract_url_from_h_card(&h_card);
let photo = extract_photo_from_h_card(&h_card);
Ok(Author {
name,
url,
photo,
h_card: Some(h_card),
})
}
AuthorProperty::Url(author_page_url) => {
resolve_author_from_page(client, &author_page_url, page_url).await
}
AuthorProperty::Name(name) => {
Ok(Author {
name,
url: None,
photo: None,
h_card: None,
})
}
}
}
async fn resolve_author_from_page(
client: &impl http::Client,
author_page_url: &Url,
original_page_url: &Url,
) -> Result<Author, AuthorshipError> {
tracing::trace!("Fetching author page: {}", author_page_url);
let author = extract_representative_h_card(client, author_page_url).await?;
if author.is_none() {
tracing::trace!("No representative h-card found on author page, checking original page");
return find_author_on_original_page(client, author_page_url, original_page_url).await;
}
Ok(author.unwrap())
}
async fn extract_representative_h_card(
client: &impl http::Client,
author_page_url: &Url,
) -> Result<Option<Author>, AuthorshipError> {
let req = Request::builder()
.uri(author_page_url.as_str())
.header("Accept", "text/html, application/mf2+json")
.body(Default::default())
.map_err(crate::Error::Http)?;
let resp = client.send_request(req).await?;
let document = mf2::http::to_mf2_document(
resp.map(|body| body.as_bytes().to_vec()),
author_page_url.as_str(),
)
.map_err(|_| AuthorshipError::ParseError)?;
for item in &document.items {
if item.is_h_card() {
let url = extract_url_from_h_card(item);
let uid = extract_uid_from_h_card(item);
if let (Some(url), Some(uid)) = (url, uid)
&& url == *author_page_url
&& uid == *author_page_url
{
let name = extract_name_from_h_card(item)?;
let photo = extract_photo_from_h_card(item);
return Ok(Some(Author {
name,
url: Some(url),
photo,
h_card: Some(item.clone()),
}));
}
}
}
let rel_map = link_rel::for_url(client, author_page_url, &["me"], "GET").await?;
if let Some(me_urls) = rel_map.get("me") {
for item in &document.items {
if item.is_h_card()
&& let Some(url) = extract_url_from_h_card(item)
&& me_urls.contains(&url)
{
let name = extract_name_from_h_card(item)?;
let photo = extract_photo_from_h_card(item);
return Ok(Some(Author {
name,
url: Some(url),
photo,
h_card: Some(item.clone()),
}));
}
}
}
Ok(None)
}
async fn find_author_on_original_page(
client: &impl http::Client,
author_page_url: &Url,
original_page_url: &Url,
) -> Result<Author, AuthorshipError> {
let req = Request::builder()
.uri(original_page_url.as_str())
.header("Accept", "text/html, application/mf2+json")
.body(Default::default())
.map_err(crate::Error::Http)?;
let resp = client.send_request(req).await?;
let document = mf2::http::to_mf2_document(
resp.map(|body| body.as_bytes().to_vec()),
original_page_url.as_str(),
)
.map_err(|_| AuthorshipError::ParseError)?;
for item in &document.items {
if item.is_h_card()
&& let Some(url) = extract_url_from_h_card(item)
&& url == *author_page_url
{
let name = extract_name_from_h_card(item)?;
let photo = extract_photo_from_h_card(item);
return Ok(Author {
name,
url: Some(url),
photo,
h_card: Some(item.clone()),
});
}
}
Err(AuthorshipError::NoAuthorFound)
}
trait HCardExt {
fn is_h_card(&self) -> bool;
}
impl HCardExt for Item {
fn is_h_card(&self) -> bool {
self.r#type
.iter()
.any(|t| t == &Class::Known(KnownClass::Card))
}
}
fn extract_name_from_h_card(h_card: &Item) -> Result<String, AuthorshipError> {
for property in ["name", "p-name", "given-name", "family-name"] {
if let Some(values) = h_card.properties.get(property)
&& let Some(value) = values.first()
{
match value {
PropertyValue::Plain(text) if !text.is_empty() => {
return Ok(text.to_string());
}
_ => continue,
}
}
}
Err(AuthorshipError::NoAuthorFound)
}
fn extract_url_from_h_card(h_card: &Item) -> Option<Url> {
for property in ["url", "u-url"] {
if let Some(values) = h_card.properties.get(property)
&& let Some(value) = values.first()
&& let PropertyValue::Url(url) = value
{
return Some((**url).clone());
}
}
None
}
fn extract_uid_from_h_card(h_card: &Item) -> Option<Url> {
if let Some(values) = h_card.properties.get("uid")
&& let Some(value) = values.first()
&& let PropertyValue::Url(url) = value
{
return Some((**url).clone());
}
None
}
fn extract_photo_from_h_card(h_card: &Item) -> Option<Url> {
for property in ["photo", "u-photo"] {
if let Some(values) = h_card.properties.get(property)
&& let Some(value) = values.first()
&& let PropertyValue::Url(url) = value
{
return Some((**url).clone());
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
use microformats::types::{Class, Document};
use std::collections::BTreeMap;
fn create_test_h_card(name: &str, url: &str, photo: Option<&str>) -> Item {
let mut properties = BTreeMap::new();
properties.insert(
"name".to_string(),
vec![PropertyValue::Plain(name.to_string().into())],
);
properties.insert(
"url".to_string(),
vec![PropertyValue::Url(url.parse::<url::Url>().unwrap().into())],
);
if let Some(photo_url) = photo {
properties.insert(
"photo".to_string(),
vec![PropertyValue::Url(
photo_url.parse::<url::Url>().unwrap().into(),
)],
);
}
Item {
r#type: vec![Class::Known(KnownClass::Card)],
properties,
value: None,
children: Vec::new().into(),
id: None,
lang: None,
}
}
fn create_test_h_entry(author_property: Option<(&str, PropertyValue)>) -> Item {
let mut properties = BTreeMap::new();
if let Some((prop_name, prop_value)) = author_property {
properties.insert(prop_name.to_string(), vec![prop_value]);
}
Item {
r#type: vec![Class::Known(KnownClass::Entry)],
properties,
value: None,
children: Vec::new().into(),
id: None,
lang: None,
}
}
fn create_test_h_feed(author_property: Option<(&str, PropertyValue)>) -> Item {
let mut properties = BTreeMap::new();
if let Some((prop_name, prop_value)) = author_property {
properties.insert(prop_name.to_string(), vec![prop_value]);
}
Item {
r#type: vec![Class::Known(KnownClass::Feed)],
properties,
value: None,
children: Vec::new().into(),
id: None,
lang: None,
}
}
#[test]
fn test_step4_hfeed_with_author_hcard() {
let feed_author_hcard = create_test_h_card(
"Feed Author",
"https://feed-author.com/",
Some("https://feed-author.com/photo.jpg"),
);
let mut h_feed = create_test_h_feed(Some((
"author",
PropertyValue::Item(feed_author_hcard.clone()),
)));
let h_entry = create_test_h_entry(None);
h_feed.children = vec![h_entry.clone()].into();
let document = Document {
rels: Default::default(),
items: vec![h_feed.clone()],
url: None,
lang: None,
};
let author_property = extract_author_from_parent_feed(&h_entry, &document);
assert!(
author_property.is_some(),
"Should find author from parent h-feed"
);
match author_property.unwrap() {
AuthorProperty::HCard(_) => {} _ => panic!("Expected AuthorProperty::HCard from h-feed author"),
}
}
#[test]
fn test_step4_hfeed_with_author_url() {
let mut h_feed = create_test_h_feed(Some((
"author",
PropertyValue::Url(
"https://feed-author.com/"
.parse::<url::Url>()
.unwrap()
.into(),
),
)));
let h_entry = create_test_h_entry(None); h_feed.children = vec![h_entry.clone()].into();
let document = Document {
rels: Default::default(),
items: vec![h_feed.clone()],
url: None,
lang: None,
};
let author_property = extract_author_from_parent_feed(&h_entry, &document);
assert!(
author_property.is_some(),
"Should find author from parent h-feed"
);
match author_property.unwrap() {
AuthorProperty::Url(url) => {
assert_eq!(url.as_str(), "https://feed-author.com/");
}
_ => panic!("Expected AuthorProperty::Url from h-feed author"),
}
}
#[test]
fn test_step4_hfeed_no_author() {
let _h_feed = create_test_h_feed(None);
let h_entry = create_test_h_entry(None); let h_feed_with_child = Item {
r#type: vec![Class::Known(KnownClass::Feed)],
properties: BTreeMap::new(),
value: None,
children: vec![h_entry.clone()].into(),
id: None,
lang: None,
};
let document = Document {
rels: Default::default(),
items: vec![h_feed_with_child],
url: None,
lang: None,
};
let author_property = extract_author_from_parent_feed(&h_entry, &document);
assert!(
author_property.is_none(),
"Should return None when h-feed has no author"
);
}
#[test]
fn test_step4_entry_not_in_feed() {
let h_feed = create_test_h_feed(Some((
"author",
PropertyValue::Url(
"https://feed-author.com/"
.parse::<url::Url>()
.unwrap()
.into(),
),
)));
let h_entry = create_test_h_entry(None);
let document = Document {
rels: Default::default(),
items: vec![h_feed],
url: None,
lang: None,
};
let author_property = extract_author_from_parent_feed(&h_entry, &document);
assert!(
author_property.is_none(),
"Should return None when entry is not in h-feed's children"
);
}
#[test]
fn test_extract_author_from_h_card() {
let h_card = create_test_h_card(
"John Doe",
"https://example.com/john",
Some("https://example.com/john.jpg"),
);
let entry = create_test_h_entry(Some(("author", PropertyValue::Item(h_card))));
let author_property = extract_author_from_entry(&entry);
assert!(author_property.is_some());
match author_property.unwrap() {
AuthorProperty::HCard(_) => {} _ => panic!("Expected AuthorProperty::HCard"),
}
}
#[test]
fn test_extract_author_from_url() {
let entry = create_test_h_entry(Some((
"author",
PropertyValue::Url(
"https://example.com/author"
.parse::<url::Url>()
.unwrap()
.into(),
),
)));
let author_property = extract_author_from_entry(&entry);
assert!(author_property.is_some());
match author_property.unwrap() {
AuthorProperty::Url(url) => {
assert_eq!(url.as_str(), "https://example.com/author");
}
_ => panic!("Expected AuthorProperty::Url"),
}
}
#[test]
fn test_extract_author_from_name() {
let entry = create_test_h_entry(Some((
"author",
PropertyValue::Plain("Jane Smith".to_string().into()),
)));
let author_property = extract_author_from_entry(&entry);
assert!(author_property.is_some());
match author_property.unwrap() {
AuthorProperty::Name(name) => {
assert_eq!(name, "Jane Smith");
}
_ => panic!("Expected AuthorProperty::Name"),
}
}
#[test]
fn test_extract_name_from_h_card() {
let h_card = create_test_h_card("Test Author", "https://example.com", None);
let name = extract_name_from_h_card(&h_card).unwrap();
assert_eq!(name, "Test Author");
}
#[test]
fn test_extract_url_from_h_card() {
let h_card = create_test_h_card("Test Author", "https://example.com/author", None);
let url = extract_url_from_h_card(&h_card).unwrap();
assert_eq!(url.as_str(), "https://example.com/author");
}
#[test]
fn test_extract_photo_from_h_card() {
let h_card = create_test_h_card(
"Test Author",
"https://example.com/author",
Some("https://example.com/photo.jpg"),
);
let photo = extract_photo_from_h_card(&h_card).unwrap();
assert_eq!(photo.as_str(), "https://example.com/photo.jpg");
}
#[test]
fn test_no_author_property() {
let entry = create_test_h_entry(None);
let author_property = extract_author_from_entry(&entry);
assert!(author_property.is_none());
}
#[test]
fn test_empty_h_entry() {
let entry = Item {
r#type: vec![Class::Known(KnownClass::Event)], properties: BTreeMap::new(),
value: None,
children: Vec::new().into(),
id: None,
lang: None,
};
let author_property = extract_author_from_entry(&entry);
assert!(author_property.is_none());
}
#[test]
fn test_is_permalink_page_with_matching_u_url() {
let page_url = Url::parse("https://example.com/post/123").unwrap();
let mut properties = BTreeMap::new();
properties.insert(
"u-url".to_string(),
vec![PropertyValue::Url(page_url.clone().into())],
);
let entry = Item {
r#type: vec![Class::Known(KnownClass::Entry)],
properties,
value: None,
children: Vec::new().into(),
id: None,
lang: None,
};
assert!(
is_permalink_page(&entry, &page_url),
"Should return true when u-url matches page URL"
);
}
#[test]
fn test_is_permalink_page_with_non_matching_u_url() {
let page_url = Url::parse("https://example.com/post/123").unwrap();
let different_url = Url::parse("https://example.com/post/456").unwrap();
let mut properties = BTreeMap::new();
properties.insert(
"u-url".to_string(),
vec![PropertyValue::Url(different_url.into())],
);
let entry = Item {
r#type: vec![Class::Known(KnownClass::Entry)],
properties,
value: None,
children: Vec::new().into(),
id: None,
lang: None,
};
assert!(
!is_permalink_page(&entry, &page_url),
"Should return false when u-url does not match page URL"
);
}
#[test]
fn test_is_permalink_page_with_matching_url() {
let page_url = Url::parse("https://example.com/post/123").unwrap();
let mut properties = BTreeMap::new();
properties.insert(
"url".to_string(),
vec![PropertyValue::Url(page_url.clone().into())],
);
let entry = Item {
r#type: vec![Class::Known(KnownClass::Entry)],
properties,
value: None,
children: Vec::new().into(),
id: None,
lang: None,
};
assert!(
is_permalink_page(&entry, &page_url),
"Should return true when url matches page URL"
);
}
#[test]
fn test_is_permalink_page_without_url() {
let page_url = Url::parse("https://example.com/post/123").unwrap();
let mut properties = BTreeMap::new();
properties.insert(
"content".to_string(),
vec![PropertyValue::Plain("Some content".to_string().into())],
);
let entry = Item {
r#type: vec![Class::Known(KnownClass::Entry)],
properties,
value: None,
children: Vec::new().into(),
id: None,
lang: None,
};
assert!(
!is_permalink_page(&entry, &page_url),
"Should return false when entry has no u-url or url"
);
}
#[test]
fn test_is_permalink_page_with_plain_url() {
let page_url = Url::parse("https://example.com/post/123").unwrap();
let mut properties = BTreeMap::new();
properties.insert(
"url".to_string(),
vec![PropertyValue::Plain(
"https://example.com/post/123".to_string().into(),
)],
);
let entry = Item {
r#type: vec![Class::Known(KnownClass::Entry)],
properties,
value: None,
children: Vec::new().into(),
id: None,
lang: None,
};
assert!(
is_permalink_page(&entry, &page_url),
"Should return true when url as plain text matches page URL"
);
}
#[test]
fn test_is_permalink_page_with_plain_url_non_matching() {
let page_url = Url::parse("https://example.com/post/123").unwrap();
let mut properties = BTreeMap::new();
properties.insert(
"url".to_string(),
vec![PropertyValue::Plain(
"https://example.com/post/456".to_string().into(),
)],
);
let entry = Item {
r#type: vec![Class::Known(KnownClass::Entry)],
properties,
value: None,
children: Vec::new().into(),
id: None,
lang: None,
};
assert!(
!is_permalink_page(&entry, &page_url),
"Should return false when url as plain text does not match page URL"
);
}
#[test]
fn test_is_permalink_page_with_invalid_plain_url() {
let page_url = Url::parse("https://example.com/post/123").unwrap();
let mut properties = BTreeMap::new();
properties.insert(
"url".to_string(),
vec![PropertyValue::Plain("not a valid url".to_string().into())],
);
let entry = Item {
r#type: vec![Class::Known(KnownClass::Entry)],
properties,
value: None,
children: Vec::new().into(),
id: None,
lang: None,
};
assert!(
!is_permalink_page(&entry, &page_url),
"Should return false when url is invalid plain text"
);
}
}