unobtanium 3.0.0

Opinioated Web search engine library with crawler and viewer companion.
Documentation

use criterium::NumberCriterium;
use criterium::rusqlite::assembler::*;

use crate::criterium::OriginCriterium;
use crate::database::id::AgentId;
use crate::database::id::NumericDatabseId;
use crate::database::id::OriginId;
use crate::database::CrawlerSchema;
use crate::database::fields::*;

/// For matching against the lit of crawl candidates
#[derive(Clone,Debug)]
pub enum CrawlCandidateCriterium {
	/// Applies the OriginCriterium to the todo-items url
	OriginMatches(OriginCriterium),

	/// Only match todo-items where the url comes from the given origin id
	OriginId(OriginId),

	/// Only match todo-items where the urls origin is marked as ignored by
	/// the given agent.
	///
	/// Usually one wants to use this in combination with an inversion to
	/// only match items that are not ignored.
	OriginIgnoredByAgent(AgentId),
	
	/// Only match todo-items where the url  is marked as ignored by
	/// the given agent.
	///
	/// Usually one wants to use this in combination with an inversion to
	/// only match items that are not ignored.
	UrlIgnoredByAgent(AgentId),

	/// Compare against the last_crawl timestamp.
	/// Use a Unix-UTC timestamp.
	///
	/// TODO: Introduce a timestamp criterium and use it here.
	LastCrawlTime(NumberCriterium<i64>),

}

impl AssembleRusqliteQuery<CrawlerSchema, ()> for CrawlCandidateCriterium {
	fn assemble_rusqlite_query(
		&self,
		assembly_context: &AssemblyContext,
		_context: &()
	) -> InvertableRusqliteQuery<CrawlerSchema> {
		match self {
			Self::OriginMatches(c) => 
				c.assemble_rusqlite_query(&assembly_context.prefix_with("crawl_candidate_"), &())
					.inner_join(
						None,
						OriginField::OriginId.into(),
						None,
						UrlField::OriginId.into(),
					).inner_join(
						None,
						UrlField::UrlId.into(),
						Some(assembly_context.prefix()), // dock to current context
						CrawlCandidateField::UrlId.into()
					),
			Self::OriginId(id) =>
				NumberCriterium::Equals(id.number())
					.assemble_query(
						&assembly_context.prefix_with("crawl_candidate_"),
						&UrlField::OriginId.into()
					)
					.inner_join(
						None,
						UrlField::UrlId.into(),
						Some(assembly_context.prefix()), // dock to current context
						CrawlCandidateField::UrlId.into()
					),
			Self::OriginIgnoredByAgent(id) => {
				let new_context = assembly_context.prefix_with("crawl_candidate_");
				NumberCriterium::Equals(id.number())
					.assemble_query(
						&new_context.in_and_block(),
						&TempIgnoreOriginField::OriginId.into()
					)
					.get_corrected_query()
					.and(RusqliteQuery::test_if_null(
						new_context.prefix(), &TempIgnoreOriginField::OriginId.into(), true
					))
					.left_join(
						Some("ignore_origin"),
						TempIgnoreOriginField::OriginId.into(),
						None,
						UrlField::OriginId.into(),
					).inner_join(
						None,
						UrlField::UrlId.into(),
						Some(assembly_context.prefix()), // dock to current context
						CrawlCandidateField::UrlId.into(),
					).as_invertable()
			},
			Self::UrlIgnoredByAgent(id) => {
				let inner_context = assembly_context.prefix_with("crawl_candidate_");
				NumberCriterium::Equals(id.number())
					.assemble_query(
						&inner_context.in_and_block(),
						&TempIgnoreUrlField::AgentId.into()
					)
					.get_corrected_query()
					.and(RusqliteQuery::test_if_null(
						inner_context.prefix(), &TempIgnoreUrlField::UrlId.into(), true))
					.left_join(
						Some("ignore_url"),
						TempIgnoreUrlField::UrlId.into(),
						Some(assembly_context.prefix()), // dock to current context
						CrawlCandidateField::UrlId.into()
					).as_invertable()
			},
			Self::LastCrawlTime(number_criterium) => {
				number_criterium.assemble_query(
					assembly_context,
					&CrawlCandidateField::LastCrawlTimeUnixUtc.into()
				)
			}
		}
	}
}

impl From<OriginCriterium> for CrawlCandidateCriterium {
	fn from(c: OriginCriterium) -> Self {
		Self::OriginMatches(c)
	}
}