unobtanium 3.0.0

Opinioated Web search engine library with crawler and viewer companion.
Documentation
use criterium::DirectMatch;
use criterium::number::NumberCriterium;
use criterium::rusqlite::assembler::*;
use criterium::string::StringCriterium;
use serde::{Serialize,Deserialize};

use crate::crawling::CrawlType;
use crate::crawling::ExitCode;
use crate::criterium::UuidCriterium;
use crate::database::fields::*;
use crate::database::CrawlerSchema;
	
use crate::crawling::CrawlLogEntry;
use crate::time::UtcTimestamp;

use super::UrlCriterium;

#[derive(Clone,Debug,Serialize,Deserialize)]
#[serde(rename_all="snake_case")]
pub enum CrawlLogEntryCriterium {

	/// Match the crawled URL
	Url(UrlCriterium),
	
	/// Match the numeric representation of a crawl type.
	CrawlType(NumberCriterium<CrawlType>),

	/// Match the Uuid of a specific crawl and by extension crawl summary.
	CrawlUuid(UuidCriterium),

	/// When the crawl happend.
	CrawlTime(NumberCriterium<UtcTimestamp>),

	/// Match the Uuid of the crawling agent.
	AgentUuid(UuidCriterium),

	/// Match against the numeric crawl exit code
	///
	/// This may change to a dedicated exit code criterium in the future to allow for
	/// queries for is_contentful or is_redirect.
	ExitCode(NumberCriterium<ExitCode>),

	/// Match how long the request took in milliseconds.
	RequestDurationMilliseconds(NumberCriterium<i64>),

	/// Match the message of the crawl log entry (only intended for human use)
	Message(StringCriterium),
}

impl AssembleRusqliteQuery<CrawlerSchema, ()> for CrawlLogEntryCriterium {
	fn assemble_rusqlite_query(
		&self,
		assembly_context: &AssemblyContext,
		_user_context: &(),
	) -> InvertableRusqliteQuery<CrawlerSchema> {
		match self {
			Self::Url(c) =>
				c.assemble_rusqlite_query(
					&assembly_context.prefix_with("crawl_log_entry_url_"),
					&()
				).inner_join(
					None,
					UrlField::UrlId.into(),
					Some(assembly_context.prefix()),
					CrawlLogField::UrlId.into(),
				),
			Self::CrawlType(c) =>
				c.assemble_query(assembly_context, &CrawlLogField::CrawlType.into()),
			Self::CrawlUuid(c) =>
				c.assemble_query(assembly_context, &CrawlLogField::CrawlUuid.into()),
			Self::CrawlTime(c) =>
				c.assemble_query(assembly_context, &CrawlLogField::TimeStartedUnixUtc.into()),
			Self::AgentUuid(c) => {
				c.assemble_query(
					&assembly_context.prefix_with("crawl_log_agent_"),
					&AgentField::AgentUuid.into()
				).inner_join(
					None,
					AgentField::AgentId.into(),
					Some(assembly_context.prefix()),
					CrawlLogField::AgentId.into()
				)
			},
			Self::ExitCode(c) =>
				c.assemble_query(assembly_context, &CrawlLogField::ExitCode.into()),
			Self::RequestDurationMilliseconds(c) =>
				c.assemble_query(assembly_context, &CrawlLogField::TimeTakenMs.into()),
			Self::Message(c) =>
				c.assemble_query(assembly_context, &CrawlLogField::Message.into()),
		}
	}
}

impl DirectMatch<CrawlLogEntry> for CrawlLogEntryCriterium {
	type Output = Option<bool>;

	fn criterium_match(&self, data: &CrawlLogEntry) -> Self::Output {
		match self {
			Self::Url(c) => c.criterium_match(&data.url).into(),
			Self::CrawlType(c) => c.criterium_match(&data.crawl_type).into(),
			Self::CrawlUuid(c) => c.criterium_match(&data.crawl_uuid).into(),
			Self::CrawlTime(c) => c.criterium_match(&data.time_started).into(),
			Self::AgentUuid(_) => None,
			Self::ExitCode(c) => c.criterium_match(&data.exit_code).into(),
			Self::RequestDurationMilliseconds(c) => c.criterium_match(&data.time_taken_ms).into(),
			Self::Message(c) => c.criterium_match(&data.message.as_ref()).into(),
		}
	}
}