unobtanium 3.0.0

Opinioated Web search engine library with crawler and viewer companion.
Documentation
use log::trace;

use std::collections::HashMap;

use crate::crawling::ExitCode;
use crate::crawling::Request;
use crate::database::crawler::structs::CrawlerDatabase;
use crate::database::error::SmuggleDatabaseErrorExtension;
use crate::database::id::CrawlLogEntryId;
use crate::database::id::NumericDatabseId;
use crate::database::id::RequestId;
use crate::database::DatabaseError;
use crate::database::sqlite_helper::*;


impl CrawlerDatabase {
	pub fn store_request(
		&mut self,
		request: &Request
	) -> Result<RequestId, DatabaseError> {

		self.base().assert_writable("store_request")?;
		
		trace!("crawler_db.store_request()");
		let url_id = self.base_mut().get_or_add_url_id(&request.url)?;
		let mut statement = self.connection().prepare_cached(
			"INSERT INTO request (
				crawl_log_id,
				url_id,
				time_sent_unix_utc,
				robotstxt_approved,
				request_duration_ms,
				exit_code,
				server_last_modified_unix_utc,
				http_status_code,
				http_etag
			) Values (?,?,?,?,?, ?,?,?,?)
		")?;
		statement.execute((
			request.crawl_log_entry,
			url_id,
			request.time_sent.timestamp(),
			request.robotstxt_approved,
			request.request_duration_ms,
			request.exit_code.to_number(),
			to_unix_timestamp_opt(request.server_last_modified),
			request.http.as_ref().map(|h| h.status_code),
			request.http.as_ref().map(|h| h.etag.clone()),
		))?;
		return Ok(RequestId::new(self.connection().last_insert_rowid()));
	}

	pub fn get_request(&self, request_id: RequestId) -> Result<Request, DatabaseError> {
		trace!("crawler_db.get_request()");
		self.connection().query_row("
			SELECT
				crawl_log_id,
				url_id,
				time_sent_unix_utc,
				request_duration_ms,
				robotstxt_approved,
				exit_code,
				server_last_modified_unix_utc,
				http_status_code,
				http_etag
			FROM request
			WHERE request_id = ?
		", (request_id,), |row| {
			Ok(Request{
				crawl_log_entry: CrawlLogEntryId::new(row.get(0)?),
				url: self.base().get_url_by_id(row.get(1)?)
					.smuggle_through_rusqlite()?,
				time_sent: from_unix_timestamp_or_epoch(row.get(2)?),
				request_duration_ms: row.get(3)?,
				robotstxt_approved: row.get(4)?,
				exit_code: ExitCode::from_number(row.get(5)?),
				server_last_modified: from_unix_timestamp_opt(row.get(6)?),
				http: http_summary_from_row(row,7,8)?,
			})
		}).map_err(Into::into)
	}

	pub fn get_request_bulk(
		&self, request_ids: &[RequestId]
	) -> Result<HashMap<RequestId,Request>, DatabaseError> {
		trace!("crawler_db.get_request_bulk()");
		let mut get_request_statement = self.connection().prepare("
			SELECT
				crawl_log_id,
				url_id,
				time_sent_unix_utc,
				request_duration_ms,
				robotstxt_approved,
				exit_code,
				server_last_modified_unix_utc,
				http_status_code,
				http_etag
			FROM request
			WHERE request_id = ?
		")?;
		let mut results = HashMap::with_capacity(request_ids.len());
		for request_id in request_ids {
			results.insert(
				*request_id,
				get_request_statement.query_row((request_id,), |row| {
				Ok(Request{
					crawl_log_entry: CrawlLogEntryId::new(row.get(0)?),
					url: self.base().get_url_by_id(row.get(1)?)
						.smuggle_through_rusqlite()?,
					time_sent: from_unix_timestamp_or_epoch(row.get(2)?),
					request_duration_ms: row.get(3)?,
					robotstxt_approved: row.get(4)?,
					exit_code: ExitCode::from_number(row.get(5)?),
					server_last_modified: from_unix_timestamp_opt(row.get(6)?),
					http: http_summary_from_row(row,7,8)?,
				})
			})?);
		}
		return Ok(results);
	}

}