unobtanium 3.0.0

Opinioated Web search engine library with crawler and viewer companion.
Documentation
use criterium::boolean::BooleanCriterium;
use criterium::DirectMatch;
use criterium::number::NumberCriterium;
use criterium::rusqlite::assembler::*;
use criterium::string::StringCriterium;
use serde::{Serialize,Deserialize};

use crate::crawling::CrawlType;
use crate::crawling::ExitCode;
use crate::criterium::UuidCriterium;
use crate::database::fields::*;
use crate::database::SummarySchema;
	
use crate::summary::CrawlSummary;
use crate::time::UtcTimestamp;

#[derive(Clone,Debug,Serialize,Deserialize)]
#[serde(rename_all="snake_case")]
pub enum CrawlSummaryCriterium {

	/// Match thenumeric representation of a crawl type.
	CrawlType(NumberCriterium<CrawlType>),

	/// Match the Uuid of a specific crawl and by extension crawl summary.
	CrawlUuid(UuidCriterium),

	/// When the crawl happend.
	CrawlTime(NumberCriterium<UtcTimestamp>),

	/// Match the Uuid of the crawling agent.
	AgentUuid(UuidCriterium),

	/// Match against the numeric crawl exit code
	///
	/// This may change to a dedicated exit code criterium in the future to allow for
	/// queries for is_contentful or is_redirect.
	ExitCode(NumberCriterium<ExitCode>),

	/// When the file was last modified according to the server.
	ServerLastModified(NumberCriterium<UtcTimestamp>),

	/// Match how long the request took in milliseconds.
	RequestDurationMilliseconds(NumberCriterium<i64>),

	/// Wheter the request was robots.txt approved
	WasRobotsTxtApproved(BooleanCriterium),

	/// Match on the Status code of the HttpSummary extension
	HttpStatusCode(NumberCriterium<u16>),

	/// Match on the returned ETag in the HttpSummary extension
	HttpETag(StringCriterium),
}

impl AssembleRusqliteQuery<SummarySchema, ()> for CrawlSummaryCriterium {
	fn assemble_rusqlite_query(
		&self,
		assembly_context: &AssemblyContext,
		_context: &(),
	) -> InvertableRusqliteQuery<SummarySchema> {
		let mut join_http_summary = false;
		let mut query = match self {
			Self::CrawlType(c) =>
				c.assemble_query(assembly_context, &CrawlSummaryField::CrawlType.into()),
			Self::CrawlUuid(c) =>
				c.assemble_query(assembly_context, &CrawlSummaryField::CrawlUuid.into()),
			Self::CrawlTime(c) =>
				c.assemble_query(assembly_context, &CrawlSummaryField::TimeStartedUnixUtc.into()),
			Self::AgentUuid(c) =>
				c.assemble_query(assembly_context, &CrawlSummaryField::AgentUuid.into()),
			Self::ExitCode(c) =>
				c.assemble_query(assembly_context, &CrawlSummaryField::ExitCode.into()),
			Self::ServerLastModified(c) =>
				c.assemble_query(assembly_context, &CrawlSummaryField::TimeLastModifiedUnixUtc.into()),
			Self::RequestDurationMilliseconds(c) =>
				c.assemble_query(assembly_context, &CrawlSummaryField::RequestDurationMs.into()),
			Self::WasRobotsTxtApproved(c) =>
				c.assemble_query(assembly_context, &CrawlSummaryField::WasRobotstxtApproved.into()),
			Self::HttpStatusCode(c) => {
				join_http_summary = true;
				c.assemble_query(assembly_context, &HttpSummaryField::StatusCode.into())
			},
			Self::HttpETag(c) => {
				join_http_summary = true;
				c.assemble_query(assembly_context, &HttpSummaryField::Etag.into())
			},
		};
		if join_http_summary {
			query = query.inner_join(
				None,
				HttpSummaryField::CrawlSummaryId.into(),
				None,
				CrawlSummaryField::CrawlSummaryId.into()
			)
		}
		return query;
	}
}

impl DirectMatch<CrawlSummary> for CrawlSummaryCriterium {

	type Output =  bool;
	
	fn criterium_match(&self, data: &CrawlSummary) -> bool {
		match self {
			Self::CrawlType(c) => c.criterium_match(&data.crawl_type),
			Self::CrawlUuid(c) => c.criterium_match(&data.crawl_uuid),
			Self::CrawlTime(c) => c.criterium_match(&data.crawl_time),
			Self::AgentUuid(c) => c.criterium_match(&data.agent_uuid),
			Self::ExitCode(c) => c.criterium_match(&data.exit_code),
			Self::ServerLastModified(c) =>
				c.criterium_match(&data.server_last_modified),
			Self::RequestDurationMilliseconds(c) =>
				c.criterium_match(&data.request_duration_ms),
			Self::WasRobotsTxtApproved(c) =>
				c.criterium_match(&data.was_robotstxt_approved),
			Self::HttpStatusCode(c) =>
				c.criterium_match(&data.http.clone().map(|h| h.status_code)),
			Self::HttpETag(c) =>
				c.criterium_match(&data.http.clone().and_then(|h| h.etag).as_ref()),
		}
	}
}