unobtanium 3.0.0

Opinioated Web search engine library with crawler and viewer companion.
Documentation
use log::trace;
use rusqlite::OptionalExtension;

use crate::database::id::NumericDatabseId;
use crate::database::DatabaseError;
use crate::database::id::TokenId;
use crate::database::summary::structs::SummaryDatabaseTransaction;
use crate::token::Token;
use crate::token::TokenStatistics;
use crate::types::Blake2b512Digest;

impl SummaryDatabaseTransaction<'_> {

	pub fn set_token_statistics_bulk(
		&mut self,
		token_statistics_list: &[(Blake2b512Digest, TokenStatistics)],
	) -> Result<(), DatabaseError> {
		trace!("summary_db_transaction.set_token_statistics_bulk()");

		let mut tokens: Vec<Token> = Vec::new();
		let mut occurances_list: Vec<usize> = Vec::new();

		for (_, token_statistics) in token_statistics_list {
			for (token, occurances) in &token_statistics.token_occurances {
				tokens.push(token.clone());
				occurances_list.push(*occurances);
			}
		}

		trace!("Fetching token ids ...");
		let token_ids = self.query_or_add_token_ids(&tokens)?;

		let mut delete_token_stat_statement = self.connection().prepare_cached("
			DELETE FROM token_statistics
			WHERE text_pile_id = ?
		")?;

		let mut insert_token_stat_statement = self.connection().prepare_cached("
			INSERT OR REPLACE INTO token_statistics (
				token_id,
				text_pile_id,
				occurances
			) VALUES (
				?,?,?
			)
		")?;

		let mut get_text_pile_id_from_hash_statement = self.connection().prepare_cached("
			SELECT text_pile_id
			FROM text_pile
			WHERE blake2b512_digest = ?
		")?;
		
		let mut accumulated_token_id_offset: usize = 0;
	
		trace!("Deriving text pile ids ...");

		let mut text_pile_ids = Vec::with_capacity(token_statistics_list.len());

		for (text_pile_blake2b512_digest, _) in token_statistics_list {
			let text_pile_id: i64 = get_text_pile_id_from_hash_statement.query_row(
				(text_pile_blake2b512_digest,),
				|row| row.get(0)
			)?;
			text_pile_ids.push(text_pile_id);
		}

		trace!("Removing old tokens ...");

		for text_pile_id in &text_pile_ids {
			delete_token_stat_statement.execute((text_pile_id,))?;
		}

		trace!("Integrating tokens ...");

		for (n, (_, token_statistics)) in token_statistics_list.iter().enumerate() {

			let text_pile_id = text_pile_ids[n];

			let len = token_statistics.token_occurances.len();

			for (n, token_id) in
				token_ids[accumulated_token_id_offset..accumulated_token_id_offset+len]
					.iter().enumerate()
			{
				insert_token_stat_statement.execute((
					token_id,
					text_pile_id,
					occurances_list[n]
				))?;
			}

			accumulated_token_id_offset += len;
		}
		Ok(())
	}

	pub fn query_or_add_token_ids(
		&mut self,
		tokens: &[Token],
	) -> Result<Vec<TokenId>, DatabaseError> {
		trace!("summary_db_transaction.query_or_add_token_ids()");
		
		let mut fetch_token_statement = self.connection().prepare_cached(
			"SELECT token_id
			FROM token
			WHERE token_text = ?
			"
		)?;
		let mut insert_token_statement = self.connection().prepare_cached(
			"INSERT INTO token (
				token_text
			) VALUES (
				?
			)"
		)?;

		let mut token_ids = Vec::with_capacity(tokens.len());

		for token in tokens {

			if let Some(id) = fetch_token_statement.query_row(
				(token.token_text.clone(),), |row| row.get(0)
			).optional()? {
				token_ids.push(id);
			} else {
				insert_token_statement.execute((token.token_text.clone(),))?;
				token_ids.push(TokenId::new(self.connection().last_insert_rowid()));
			}
		}

		return Ok(token_ids);

	}

}