unobtanium 3.0.0

Opinioated Web search engine library with crawler and viewer companion.
Documentation
use log::trace;

use crate::crawling::File;
use crate::database::crawler::structs::CrawlerDatabase;
use crate::database::crawler::structs::CrawlerDatabaseTransaction;
use crate::database::id::FileInfoId;
use crate::database::id::NumericDatabseId;
use crate::database::DatabaseError;
use crate::database::error::SmuggleDatabaseErrorExtension;
use crate::database::id::CrawlLogEntryId;
use crate::database::Page;
use crate::database::sqlite_helper::*;
use crate::database::WithNumericId;
use crate::url::UrlWithoutFragment;


impl CrawlerDatabase {

	pub fn get_file_info(&self, id: FileInfoId) -> Result<File, DatabaseError> {
		trace!("crawler_db.get_file_info()");
		return self.connection().query_row(
			"SELECT
				crawl_log_id,
				request_id,
				url_id,
				last_modified_unix_utc,
				file_size,
				mimetype_id,
				canonical_url_id,
				file_id
			FROM file
			WHERE file_id = ?
			", (id,),
			|row| {
				// Fake an Ok here
				Ok(file_row_to_struct(self, row))
			}
		)?; // And unpack it here to smuggle the DatabaseError
	}

	pub fn get_latest_file_info_by_url(
		&self,
		url: &UrlWithoutFragment
	) -> Result<WithNumericId<File, FileInfoId>, DatabaseError> {
		trace!("crawler_db.get_latest_file_info_by_url()");
		let url_id = self.base().read_url_id(url)?;
		return self.connection().query_row(
			"SELECT
				file.crawl_log_id,
				file.request_id,
				file.url_id,
				file.last_modified_unix_utc,
				file.file_size,
				file.mimetype_id,
				file.canonical_url_id,
				file.file_id
			FROM file
			INNER JOIN crawl_log ON crawl_log.crawl_log_id = file.crawl_log_id
			WHERE file.url_id = ?
			ORDER BY crawl_log.time_started_unix_utc DESC
			LIMIT 1
			", (url_id,),
			|row| {
				match file_row_to_struct(self,row) {
					Err(e) => Ok(Err(e)),
					Ok(file) => Ok(Ok(WithNumericId{
						id: row.get(7)?,
						data: file,
					})),
				}
			}
		)?;
	}


	pub fn get_file_infos(
		&self,
		page: &Page
	) -> Result<Vec<WithNumericId<File, FileInfoId>>, DatabaseError> {
		trace!("crawler_db.get_file_infos()");
		let mut selector = self.connection().prepare(
			"SELECT
				crawl_log_id,
				request_id,
				url_id,
				last_modified_unix_utc,
				file_size,
				mimetype_id,
				canonical_url_id,
				file_id
			FROM file
			LIMIT ?
			OFFSET ?
		")?;
		return selector.query_map(
			(page.limit(), page.offset()),
			|row| { Ok(WithNumericId{
				id: row.get(7)?,
				data: file_row_to_struct(self,row).smuggle_through_rusqlite()?,
			})}
		)?.map(|r| r.map_err(Into::into)).collect();
	}

}

impl CrawlerDatabaseTransaction<'_> {

	pub fn store_file_info(
		&mut self,
		file: &File
	) -> Result<FileInfoId, DatabaseError> {

		self.base_transaction.assert_writable("store_file_info")?;
		
		trace!("crawler_db_transaction.store_file_info()");
		// These two would open two transactions when called from outside the transaction.
		let url_id = self.base_transaction.get_url_id(&file.url, true)?;
		let canonical_url_id = if let Some(url) = &file.canonical_url {
			let canonical_url_id = self.base_transaction.get_url_id(url, true)?;
			self.store_crawler_found_link_by_id(canonical_url_id)?;
			Some(canonical_url_id)
		} else {
			None
		};
		let mimetype_id = self.base_transaction.get_mimetype_id(&file.mime, true)?;
		self.connection().execute(
			"INSERT INTO file (
				crawl_log_id,
				request_id,
				url_id,
				last_modified_unix_utc,
				file_size,
				mimetype_id,
				canonical_url_id
			) Values (?,?,?,?, ?,?,?)
			",(
				file.crawl_log_entry,
				file.request_id,
				url_id,
				to_unix_timestamp_opt(file.last_modified),
				file.size,
				mimetype_id,
				canonical_url_id,
		))?;
		let file_id = FileInfoId::new(self.connection().last_insert_rowid());
		return Ok(file_id);
	}


}

fn file_row_to_struct(
	db: &CrawlerDatabase,
	row: &rusqlite::Row
) -> Result<File, DatabaseError> {

	/*
		0	crawl_log_id,
		1	request_id,
		2	url_id,
		3	last_modified_unix_utc,
		4	file_size,
		5	mimetype_id,
		6	canonical_url_id,
		7	file_id,
     */
    let mimetype = db.base().get_mimetype_by_id(row.get(5)?)?;
    let canonical_url = if let Some(id) = row.get(6)? {
		Some(db.base().get_url_by_id(id)?)
    } else {
		None
    };
	let file = File{
		crawl_log_entry: CrawlLogEntryId::new(row.get(0)?),
		request_id: row.get(1)?,
		url: db.base().get_url_by_id(row.get(2)?)?,
		last_modified: from_unix_timestamp_opt(row.get(3)?),
		size: row.get(4)?,
		mime: mimetype,
		canonical_url: canonical_url,
	};

	return Ok(file);
}