unobtanium 3.0.0

Opinioated Web search engine library with crawler and viewer companion.
Documentation
use criterium::number::AsInteger;
use serde::{Serialize, Deserialize};
use url::Url;

use std::str::FromStr;

use crate::content::LinkLocality;
use crate::content::LocationSignature;

#[derive(Debug,Clone,Serialize,Deserialize)]
pub struct LinkSummary {
	pub text: Option<String>,
	pub url: Url,
	pub rel_nofollow: bool,
	pub rel_me: bool,
	pub rel_tag: bool,
	pub link_destination_type: Option<LinkType>,
	pub location_signature: LocationSignature,
	pub link_locality: LinkLocality,
	pub html_tag_name: Option<String>,
	pub contains_headline: bool,
}

impl LinkSummary {
	/// Returns wheter this summary represents a visible navigation link
	/// that is located in the main content of a site.
	pub fn is_relevant_navigation_link(&self) -> bool {
		(
			(!(self.location_signature.in_header || self.location_signature.in_footer))
			|| self.location_signature.in_main || self.location_signature.in_article
		)
		&& !self.location_signature.in_nav
		&& !self.location_signature.in_aside
		&& self.text.is_some()
		&& self.html_tag_name.as_ref().is_some_and(|s| s == "a")
	}
}

/// What to expect when following a link (across redirects)
#[derive(Debug,Clone,PartialEq,Eq,Serialize,Deserialize)]
pub struct LinkType(u16);

impl LinkType {
	/// The content is unknown.
	pub const FILE: LinkType = LinkType(0);

	/// Mailto and tel links (and friends).
	pub const COMMUNICATION: LinkType = LinkType(1000);

	/// Documents are for browsing.
	pub const DOCUMENT: LinkType = LinkType(1);
	
	/// Link was marked as being for a feed
	/// or the destinations primary purpose is to be a feed.
	pub const FEED: LinkType = LinkType(2);
	
	/// Link points to a Stylesheet.
	pub const STYLESHEET: LinkType = LinkType(3);
	
	/// Link points to a Script for execution in a Browser.
	pub const SCRIPT: LinkType = LinkType(4);
	
	/// Link points to an image File.
	pub const MEDIA_IMAGE: LinkType = LinkType(101);
	
	/// Link points to an audio File.
	pub const MEDIA_AUDIO: LinkType = LinkType(102);
	
	/// Link points to some kind of video File.
	pub const MEDIA_VIDEO: LinkType = LinkType(103);
	
	/// Link points to a media or media related (playlist, subtitle) file
	///  of unknown nature, it is planned to make those their
	///  own types when the need arises.
	pub const MEDIA: LinkType = LinkType(100);
}

impl ToString for LinkType {
	fn to_string(&self) -> String {
		match *self {
			Self::FILE => "file",
			Self::COMMUNICATION => "communication",
			Self::DOCUMENT => "document",
			Self::FEED => "feed",
			Self::STYLESHEET => "stylesheet",
			Self::SCRIPT => "script",
			Self::MEDIA_IMAGE => "media_image",
			Self::MEDIA_AUDIO => "media_audio",
			Self::MEDIA_VIDEO => "media_video",
			Self::MEDIA => "media",
			_ => { return self.0.to_string() }
		}.to_string()
	}
}

impl FromStr for LinkType {
	type Err = &'static str;
	
	fn from_str(s: &str) -> Result<Self, Self::Err> {
		match s {
			"file" => Ok(LinkType::FILE),
			"communication" => Ok(LinkType::COMMUNICATION),
			"document" => Ok(LinkType::DOCUMENT),
			"feed" => Ok(LinkType::FEED),
			"stylesheet" => Ok(LinkType::STYLESHEET),
			"script" => Ok(LinkType::SCRIPT),
			"media_image" => Ok(LinkType::MEDIA_IMAGE),
			"media_audio" => Ok(LinkType::MEDIA_AUDIO),
			"media_video" => Ok(LinkType::MEDIA_VIDEO),
			"media" => Ok(LinkType::MEDIA),
			_ => {
				if let Ok(code) = u16::from_str(s) {
					Ok(Self(code))
				} else {
					Err("Not a recognized scheduling reason. Make sure it is in lower_snake_case or number in the u16 range.")
				}
			},
		}
	}
}

impl From<LinkType> for u16 {
	fn from(link_type: LinkType) -> u16 {
		link_type.0
	}
}

impl From<u16> for LinkType {
	fn from(n: u16) -> Self {
		Self(n)
	}
}

impl LinkType {
	pub fn from_number(n: u16) -> Self {
		n.into()
	}

	pub fn to_number(self) -> u16 {
		self.into()
	}
}

impl AsInteger for LinkType {
	fn as_criterium_i64(&self) -> i64 {
		self.0 as i64
	}
}