use log::trace;
use rusqlite::OptionalExtension;
use crate::database::id::NumericDatabseId;
use crate::database::DatabaseError;
use crate::database::id::TokenId;
use crate::database::summary::structs::SummaryDatabaseTransaction;
use crate::token::Token;
use crate::token::TokenStatistics;
use crate::types::Blake2b512Digest;
impl SummaryDatabaseTransaction<'_> {
pub fn set_token_statistics_bulk(
&mut self,
token_statistics_list: &[(Blake2b512Digest, TokenStatistics)],
) -> Result<(), DatabaseError> {
trace!("summary_db_transaction.set_token_statistics_bulk()");
let mut tokens: Vec<Token> = Vec::new();
let mut occurances_list: Vec<usize> = Vec::new();
for (_, token_statistics) in token_statistics_list {
for (token, occurances) in &token_statistics.token_occurances {
tokens.push(token.clone());
occurances_list.push(*occurances);
}
}
trace!("Fetching token ids ...");
let token_ids = self.query_or_add_token_ids(&tokens)?;
let mut delete_token_stat_statement = self.connection().prepare_cached("
DELETE FROM token_statistics
WHERE text_pile_id = ?
")?;
let mut insert_token_stat_statement = self.connection().prepare_cached("
INSERT OR REPLACE INTO token_statistics (
token_id,
text_pile_id,
occurances
) VALUES (
?,?,?
)
")?;
let mut get_text_pile_id_from_hash_statement = self.connection().prepare_cached("
SELECT text_pile_id
FROM text_pile
WHERE blake2b512_digest = ?
")?;
let mut accumulated_token_id_offset: usize = 0;
trace!("Deriving text pile ids ...");
let mut text_pile_ids = Vec::with_capacity(token_statistics_list.len());
for (text_pile_blake2b512_digest, _) in token_statistics_list {
let text_pile_id: i64 = get_text_pile_id_from_hash_statement.query_row(
(text_pile_blake2b512_digest,),
|row| row.get(0)
)?;
text_pile_ids.push(text_pile_id);
}
trace!("Removing old tokens ...");
for text_pile_id in &text_pile_ids {
delete_token_stat_statement.execute((text_pile_id,))?;
}
trace!("Integrating tokens ...");
for (n, (_, token_statistics)) in token_statistics_list.iter().enumerate() {
let text_pile_id = text_pile_ids[n];
let len = token_statistics.token_occurances.len();
for (n, token_id) in
token_ids[accumulated_token_id_offset..accumulated_token_id_offset+len]
.iter().enumerate()
{
insert_token_stat_statement.execute((
token_id,
text_pile_id,
occurances_list[n]
))?;
}
accumulated_token_id_offset += len;
}
Ok(())
}
pub fn query_or_add_token_ids(
&mut self,
tokens: &[Token],
) -> Result<Vec<TokenId>, DatabaseError> {
trace!("summary_db_transaction.query_or_add_token_ids()");
let mut fetch_token_statement = self.connection().prepare_cached(
"SELECT token_id
FROM token
WHERE token_text = ?
"
)?;
let mut insert_token_statement = self.connection().prepare_cached(
"INSERT INTO token (
token_text
) VALUES (
?
)"
)?;
let mut token_ids = Vec::with_capacity(tokens.len());
for token in tokens {
if let Some(id) = fetch_token_statement.query_row(
(token.token_text.clone(),), |row| row.get(0)
).optional()? {
token_ids.push(id);
} else {
insert_token_statement.execute((token.token_text.clone(),))?;
token_ids.push(TokenId::new(self.connection().last_insert_rowid()));
}
}
return Ok(token_ids);
}
}