use std::collections::{HashMap, HashSet};
use std::ffi::OsString;
use std::path::{Path, PathBuf};
use lean_semantic_search_contract::DeclarationFeatureRow;
use lean_semantic_search_retrieval::RETRIEVAL_POLICY_VERSION;
use rusqlite::{Connection, params};
use crate::StoreError;
use crate::schema::{
self, META_CORPUS_TOKEN, META_POLICY_VERSION, META_SCHEMA_VERSION, META_TOTAL_DOCUMENTS, STORE_SCHEMA_VERSION,
};
pub enum Ingest {
Declaration(String),
Feature(DeclarationFeatureRow),
}
pub struct StoreBuilder {
connection: Option<Connection>,
temp_path: PathBuf,
final_path: PathBuf,
corpus_token: String,
announced: HashSet<String>,
awaiting: HashMap<String, DeclarationFeatureRow>,
total_documents: usize,
}
impl StoreBuilder {
pub fn create(path: impl AsRef<Path>, corpus_token: impl Into<String>) -> Result<Self, StoreError> {
let final_path = path.as_ref().to_path_buf();
if let Some(parent) = final_path.parent()
&& !parent.as_os_str().is_empty()
{
std::fs::create_dir_all(parent)?;
}
let temp_path = building_path(&final_path);
if temp_path.exists() {
std::fs::remove_file(&temp_path)?;
}
let connection = Connection::open(&temp_path)?;
schema::initialize(&connection)?;
connection.execute_batch("BEGIN")?;
Ok(Self {
connection: Some(connection),
temp_path,
final_path,
corpus_token: corpus_token.into(),
announced: HashSet::new(),
awaiting: HashMap::new(),
total_documents: 0,
})
}
pub fn accept(&mut self, item: Ingest) -> Result<(), StoreError> {
match item {
Ingest::Declaration(declaration_id) => {
if let Some(row) = self.awaiting.remove(&declaration_id) {
self.write_row(&row)?;
} else {
self.announced.insert(declaration_id);
}
}
Ingest::Feature(row) => {
if self.announced.remove(&row.declaration_id) {
self.write_row(&row)?;
} else {
self.awaiting.insert(row.declaration_id.clone(), row);
}
}
}
Ok(())
}
pub fn publish(mut self) -> Result<PathBuf, StoreError> {
let connection = self.connection.take().ok_or(StoreError::Closed)?;
write_metadata(&connection, META_SCHEMA_VERSION, STORE_SCHEMA_VERSION)?;
write_metadata(&connection, META_POLICY_VERSION, RETRIEVAL_POLICY_VERSION)?;
write_metadata(&connection, META_CORPUS_TOKEN, &self.corpus_token)?;
write_metadata(&connection, META_TOTAL_DOCUMENTS, &self.total_documents.to_string())?;
connection.execute_batch("COMMIT")?;
connection.close().map_err(|(_, error)| StoreError::Sqlite(error))?;
std::fs::rename(&self.temp_path, &self.final_path)?;
Ok(self.final_path.clone())
}
fn write_row(&mut self, row: &DeclarationFeatureRow) -> Result<(), StoreError> {
let connection = self.connection.as_ref().ok_or(StoreError::Closed)?;
let row_json = serde_json::to_string(row)?;
connection
.prepare_cached(
"INSERT OR REPLACE INTO feature_rows(declaration_id, feature_version, row_json) VALUES (?1, ?2, ?3)",
)?
.execute(params![row.declaration_id, row.feature_version, row_json])?;
let mut posting =
connection.prepare_cached("INSERT OR IGNORE INTO postings(key, declaration_id) VALUES (?1, ?2)")?;
for key in posting_keys(row) {
posting.execute(params![key, row.declaration_id])?;
}
self.total_documents = self.total_documents.saturating_add(1);
Ok(())
}
}
impl Drop for StoreBuilder {
fn drop(&mut self) {
if let Some(connection) = self.connection.take() {
connection.close().ok();
}
std::fs::remove_file(&self.temp_path).ok();
}
}
fn posting_keys(row: &DeclarationFeatureRow) -> HashSet<&str> {
let mut keys: HashSet<&str> = HashSet::with_capacity(row.role_features.len().saturating_add(4));
keys.insert(row.fingerprints.statement.as_str());
keys.insert(row.fingerprints.safe_binder_permutation.as_str());
keys.insert(row.fingerprints.connective_shape.as_str());
keys.insert(row.fingerprints.conclusion_shape.as_str());
for feature in &row.role_features {
keys.insert(feature.key.as_str());
}
keys
}
fn write_metadata(connection: &Connection, key: &str, value: &str) -> Result<(), StoreError> {
connection
.prepare_cached("INSERT OR REPLACE INTO metadata(key, value) VALUES (?1, ?2)")?
.execute(params![key, value])?;
Ok(())
}
fn building_path(final_path: &Path) -> PathBuf {
let mut name = final_path.file_name().map_or_else(OsString::new, OsString::from);
name.push(".building");
final_path.with_file_name(name)
}