use std::collections::HashSet;
use std::path::PathBuf;
use std::time::Duration;
use crate::memory::config::MemoryConfig;
mod schema;
#[path = "connection.rs"]
mod connection;
#[path = "recovery.rs"]
mod recovery;
#[path = "embeddings.rs"]
mod embeddings;
#[path = "migrations.rs"]
mod migrations;
#[path = "produce.rs"]
mod produce;
#[path = "produce_split.rs"]
mod produce_split;
#[path = "raw_refs.rs"]
mod raw_refs;
#[path = "semantic.rs"]
mod semantic;
#[path = "store.rs"]
mod store;
#[path = "store_delete.rs"]
mod store_delete;
#[path = "store_sources.rs"]
mod store_sources;
#[path = "types.rs"]
mod types;
#[cfg(test)]
#[path = "store_conn_tests.rs"]
mod store_conn_tests;
#[cfg(test)]
#[path = "store_embed_tests.rs"]
mod store_embed_tests;
#[cfg(test)]
#[path = "store_tests.rs"]
mod store_tests;
pub use produce::{chunk_markdown, ChunkerInput, ChunkerOptions, DEFAULT_CHUNK_MAX_TOKENS};
pub use semantic::{chunk_markdown as chunk_semantic, Chunk as SemanticChunk};
pub use types::{
approx_token_count, chunk_id, conservative_token_estimate, truncate_to_conservative_tokens,
Chunk, DataSource, Metadata, SourceKind, SourceRef, StagedChunk,
};
pub use connection::with_connection;
pub use embeddings::{
clear_chunk_reembed_skipped, clear_reembed_skipped_for_signature, get_chunk_embedding,
get_chunk_embedding_for_signature, get_chunk_embeddings_batch,
get_chunk_embeddings_for_signature_batch, mark_chunk_reembed_skipped, set_chunk_embedding,
set_chunk_embedding_for_signature, tree_active_signature,
};
pub use raw_refs::{
get_chunk_content_path, get_chunk_content_pointers, get_chunk_raw_refs,
get_summary_content_pointers, list_chunk_raw_ref_paths_with_prefix,
list_summaries_with_content_path, set_chunk_raw_refs, set_chunk_raw_refs_tx, RawRef,
};
pub use store::{
claim_source_ingest_tx, count_chunks, count_chunks_by_lifecycle_status,
count_raw_paths_ingested_with_prefix, extraction_coverage, filter_raw_paths_not_ingested,
get_chunk, get_chunk_lifecycle_status, get_chunks_batch, is_source_ingested, list_chunks,
mark_raw_paths_ingested, set_chunk_lifecycle_status, upsert_chunks, ListChunksQuery,
CHUNK_STATUS_ADMITTED, CHUNK_STATUS_BUFFERED, CHUNK_STATUS_DROPPED,
CHUNK_STATUS_PENDING_EXTRACTION, CHUNK_STATUS_SEALED, RAW_FILE_GATE_KIND,
};
pub use store_delete::{
delete_chunks_by_owner, delete_chunks_by_source, delete_chunks_by_source_prefix,
};
pub(crate) const DB_DIR: &str = "memory_tree";
pub(crate) const DB_FILE: &str = "chunks.db";
pub(crate) const SQLITE_BUSY_TIMEOUT: Duration = Duration::from_secs(15);
pub(crate) const TREE_EMBEDDING_MIGRATION_VERSION: i64 = 1;
pub(crate) const GLOBAL_TOPIC_PURGE_MIGRATION_VERSION: i64 = 2;
pub(crate) fn db_path_for(config: &MemoryConfig) -> PathBuf {
config.workspace.join(DB_DIR).join(DB_FILE)
}
pub(crate) fn content_root(config: &MemoryConfig) -> PathBuf {
config.workspace.join(DB_DIR).join("content")
}
#[allow(dead_code)]
pub(crate) fn redact(s: &str) -> String {
use sha2::{Digest, Sha256};
let mut h = Sha256::new();
h.update(s.as_bytes());
let d = h.finalize();
format!("{:08x}", u32::from_be_bytes([d[0], d[1], d[2], d[3]]))
}
const MEMORY_SOURCE_TAG: &str = "memory_sources";
fn extract_mem_src_id(composite_source_id: &str) -> Option<&str> {
let rest = composite_source_id.strip_prefix("mem_src:")?;
let colon_pos = rest.find(':')?;
let source_id = &rest[..colon_pos];
if colon_pos + 1 >= rest.len() {
return None;
}
Some(source_id)
}
pub(crate) fn chunk_source_allowed_in(
set: &HashSet<String>,
tags: &[String],
source_id: &str,
) -> bool {
let is_memory_source = tags.iter().any(|t| t == MEMORY_SOURCE_TAG);
if !is_memory_source {
return true;
}
if set.contains(source_id) {
return true;
}
extract_mem_src_id(source_id).is_some_and(|id| set.contains(id))
}