#![allow(
clippy::unwrap_used,
clippy::expect_used,
reason = "test code — panics are acceptable failures"
)]
use std::collections::HashSet;
use std::sync::Arc;
use cognee_cognify::{CognifyConfig, cognify};
use cognee_database::{DatabaseConnection, DeleteDb, IngestDb, connect, initialize, ops};
use cognee_delete::{DeleteMode, DeleteRequest, DeleteScope, DeleteService};
use cognee_embedding::EmbeddingEngine;
use cognee_graph::{GraphDBTrait, LadybugAdapter};
use cognee_ingestion::AddPipeline;
use cognee_llm::Llm;
use cognee_models::DataInput;
use cognee_ontology::NoOpOntologyResolver;
use cognee_storage::{LocalStorage, StorageTrait};
use cognee_test_utils::MockVectorDB;
use cognee_vector::VectorDB;
use tempfile::TempDir;
use uuid::Uuid;
mod test_utils;
use test_utils::{create_deterministic_embedding_engine, create_llm_from_env};
const AI_TEXT: &str = include_str!("test_data/artificial_intelligence.txt");
const ML_TEXT: &str = "\
Machine learning is a core subfield of artificial intelligence that enables \
computers to learn from data without being explicitly programmed. \
Deep learning, which uses neural networks with many layers, has driven \
recent breakthroughs in natural language processing and computer vision. \
Large language models like GPT-4 from OpenAI and LLaMA from Meta \
demonstrate the power of transformer architectures trained on massive datasets. \
Reinforcement learning, another branch of machine learning, trains agents \
through trial and error in complex environments.";
fn extract_node_names(nodes: &[(String, cognee_graph::NodeData)]) -> HashSet<String> {
nodes
.iter()
.filter_map(|(_id, props)| props.get("name")?.as_str().map(|s| s.to_lowercase()))
.collect()
}
#[tokio::test]
async fn test_shared_entity_graph_delete() {
let temp_dir = TempDir::new().expect("temp dir");
let embedding_engine: Arc<dyn EmbeddingEngine> = create_deterministic_embedding_engine();
let storage: Arc<dyn StorageTrait> =
Arc::new(LocalStorage::new(temp_dir.path().join("storage")));
storage.initialize().await.expect("storage.initialize");
let db_path = temp_dir.path().join("cognee.db");
std::fs::File::create(&db_path).expect("create sqlite db file");
let db_url = format!("sqlite://{}", db_path.display());
let db = connect(&db_url).await.expect("connect");
initialize(&db).await.expect("initialize");
let database: Arc<DatabaseConnection> = Arc::new(db);
let graph_path = temp_dir.path().join("graph").to_string_lossy().to_string();
let graph_db: Arc<dyn GraphDBTrait> = Arc::new(
LadybugAdapter::new(&graph_path)
.await
.expect("LadybugAdapter::new"),
);
graph_db.initialize().await.expect("graph_db.initialize");
let vector_db: Arc<dyn VectorDB> = Arc::new(MockVectorDB::new());
let llm: Arc<dyn Llm> = create_llm_from_env("shared_entity_graph_delete");
let owner_id = Uuid::nil();
let ontology = Arc::new(NoOpOntologyResolver::new());
let ingest = AddPipeline::new(Arc::clone(&storage), database.clone() as Arc<dyn IngestDb>)
.with_thread_pool(Arc::new(
cognee_core::RayonThreadPool::with_default_threads().unwrap(),
))
.with_graph_db(Arc::clone(&graph_db))
.with_vector_db(Arc::clone(&vector_db))
.with_database(Arc::clone(&database));
let data_ai = ingest
.add(
vec![DataInput::Text(AI_TEXT.to_string())],
"ds_ai",
owner_id,
None,
)
.await
.expect("ingest ds_ai");
assert_eq!(data_ai.len(), 1);
let data_ml = ingest
.add(
vec![DataInput::Text(ML_TEXT.to_string())],
"ds_ml",
owner_id,
None,
)
.await
.expect("ingest ds_ml");
assert_eq!(data_ml.len(), 1);
let ds_ai = ops::datasets::get_dataset_by_name(&database, "ds_ai", owner_id, None)
.await
.expect("get ds_ai")
.expect("ds_ai should exist");
let ds_ml = ops::datasets::get_dataset_by_name(&database, "ds_ml", owner_id, None)
.await
.expect("get ds_ml")
.expect("ds_ml should exist");
println!("Step 1 OK: Ingested 2 documents into ds_ai and ds_ml");
let config = CognifyConfig::default()
.with_summarization(false)
.with_triplet_embeddings(false);
let result_ai = match cognify(
data_ai,
ds_ai.id,
Some(owner_id),
None,
None,
llm.clone() as Arc<dyn Llm>,
storage.clone(),
graph_db.clone(),
vector_db.clone(),
embedding_engine.clone(),
Arc::clone(&database),
Arc::new(cognee_database::NoopPipelineRunRepository::new())
as Arc<dyn cognee_database::PipelineRunRepository>,
Arc::new(
cognee_core::RayonThreadPool::with_default_threads().expect("RayonThreadPool init"),
) as Arc<dyn cognee_core::CpuPool>,
ontology.clone(),
&config,
)
.await
{
Ok(r) => r,
Err(e) => {
test_utils::fail_loudly_on_replay_miss("cognify ds_ai", &e);
eprintln!("Skipping: cognify ds_ai failed: {e}");
return;
}
};
let result_ml = match cognify(
data_ml,
ds_ml.id,
Some(owner_id),
None,
None,
llm.clone() as Arc<dyn Llm>,
storage.clone(),
graph_db.clone(),
vector_db.clone(),
embedding_engine.clone(),
Arc::clone(&database),
Arc::new(cognee_database::NoopPipelineRunRepository::new())
as Arc<dyn cognee_database::PipelineRunRepository>,
Arc::new(
cognee_core::RayonThreadPool::with_default_threads().expect("RayonThreadPool init"),
) as Arc<dyn cognee_core::CpuPool>,
ontology.clone(),
&config,
)
.await
{
Ok(r) => r,
Err(e) => {
test_utils::fail_loudly_on_replay_miss("cognify ds_ml", &e);
eprintln!("Skipping: cognify ds_ml failed: {e}");
return;
}
};
println!(
"Step 2 OK: Cognified ds_ai ({} entities, {} edges), ds_ml ({} entities, {} edges)",
result_ai.entities.len(),
result_ai.edges.len(),
result_ml.entities.len(),
result_ml.edges.len(),
);
let (pre_nodes, _pre_edges) = graph_db
.get_graph_data()
.await
.expect("get_graph_data pre-delete");
let pre_node_count = pre_nodes.len();
let pre_names = extract_node_names(&pre_nodes);
let ai_entity_names: HashSet<String> = result_ai
.entities
.iter()
.map(|e| e.entity.name.to_lowercase())
.collect();
let ml_entity_names: HashSet<String> = result_ml
.entities
.iter()
.map(|e| e.entity.name.to_lowercase())
.collect();
let shared_names: HashSet<String> = ai_entity_names
.intersection(&ml_entity_names)
.cloned()
.collect();
println!(
"Step 3: Pre-delete state: {} graph nodes, {} pre-delete names",
pre_node_count,
pre_names.len(),
);
println!(
" AI entities: {ai_entity_names:?}\n ML entities: {ml_entity_names:?}\n Shared: {shared_names:?}",
);
assert!(
pre_node_count > 0,
"Graph should have nodes after cognifying both datasets"
);
let delete_svc =
DeleteService::new(Arc::clone(&storage), database.clone() as Arc<dyn DeleteDb>)
.with_graph_db(graph_db.clone())
.with_vector_db(vector_db.clone());
let delete_result = delete_svc
.execute(&DeleteRequest {
scope: DeleteScope::Dataset {
owner_id,
dataset_name: "ds_ai".to_string(),
},
mode: DeleteMode::Hard,
memory_only: false,
})
.await
.expect("delete ds_ai");
assert!(
delete_result.deleted_datasets >= 1,
"Should have deleted at least 1 dataset"
);
println!(
"Step 4 OK: Deleted ds_ai ({} datasets, {} data, {} graph nodes)",
delete_result.deleted_datasets,
delete_result.deleted_data,
delete_result.deleted_graph_nodes,
);
assert!(
!graph_db.is_empty().await.expect("is_empty"),
"Graph should NOT be empty — ds_ml data should survive"
);
let (post_nodes, _post_edges) = graph_db
.get_graph_data()
.await
.expect("get_graph_data post-delete");
let post_node_count = post_nodes.len();
let post_names = extract_node_names(&post_nodes);
println!("Step 5: Post-delete state: {post_node_count} graph nodes (was {pre_node_count})",);
assert!(
post_node_count < pre_node_count,
"Node count should decrease after deleting ds_ai: post={post_node_count}, pre={pre_node_count}",
);
if !shared_names.is_empty() {
let surviving_shared: Vec<&String> = shared_names
.iter()
.filter(|name| post_names.contains(*name))
.collect();
println!(
" Shared entities surviving: {}/{} ({:?})",
surviving_shared.len(),
shared_names.len(),
surviving_shared,
);
if surviving_shared.is_empty() {
eprintln!(
"WARNING: No shared entities survived deletion. \
This may be due to LLM non-determinism in entity naming. \
Shared names were: {shared_names:?}"
);
}
}
let ds_ai_after = ops::datasets::get_dataset_by_name(&database, "ds_ai", owner_id, None)
.await
.expect("get ds_ai after delete");
assert!(
ds_ai_after.is_none(),
"ds_ai should not exist after deletion"
);
let ds_ml_after = ops::datasets::get_dataset_by_name(&database, "ds_ml", owner_id, None)
.await
.expect("get ds_ml after delete")
.expect("ds_ml should still exist");
let ml_data = ops::datasets::get_dataset_data(&database, ds_ml_after.id)
.await
.expect("get ds_ml data");
assert!(
!ml_data.is_empty(),
"ds_ml should still have data after deleting ds_ai"
);
println!("Step 5 OK: All post-delete invariants verified");
println!("PASSED: test_shared_entity_graph_delete");
}