use crate::directories::RamDirectory;
use crate::dsl::{Document, Field, SchemaBuilder};
use crate::error::Error;
use crate::index::{IndexConfig, IndexWriter};
fn make_schema() -> (crate::dsl::Schema, Field, Field) {
let mut builder = SchemaBuilder::default();
let pk = builder.add_text_field("id", true, true);
builder.set_fast(pk, true);
builder.set_primary_key(pk);
let title = builder.add_text_field("title", true, true);
let schema = builder.build();
(schema, pk, title)
}
fn make_doc(pk: Field, title: Field, id: &str, title_val: &str) -> Document {
let mut doc = Document::new();
doc.add_text(pk, id);
doc.add_text(title, title_val);
doc
}
#[tokio::test]
async fn test_dedup_rejects_duplicate_uncommitted() {
let (schema, pk, title) = make_schema();
let dir = RamDirectory::new();
let config = IndexConfig::default();
let mut writer = IndexWriter::create(dir, schema, config).await.unwrap();
writer.init_primary_key_dedup().await.unwrap();
assert!(
writer
.add_document(make_doc(pk, title, "doc1", "Hello"))
.is_ok()
);
let err = writer
.add_document(make_doc(pk, title, "doc1", "Hello again"))
.unwrap_err();
match err {
Error::DuplicatePrimaryKey(k) => assert_eq!(k, "doc1"),
other => panic!("Expected DuplicatePrimaryKey, got {:?}", other),
}
assert!(
writer
.add_document(make_doc(pk, title, "doc2", "World"))
.is_ok()
);
}
#[tokio::test]
async fn test_dedup_across_commit() {
let (schema, pk, title) = make_schema();
let dir = RamDirectory::new();
let config = IndexConfig::default();
let mut writer = IndexWriter::create(dir.clone(), schema, config.clone())
.await
.unwrap();
writer.init_primary_key_dedup().await.unwrap();
writer
.add_document(make_doc(pk, title, "key1", "First"))
.unwrap();
writer
.add_document(make_doc(pk, title, "key2", "Second"))
.unwrap();
writer.commit().await.unwrap();
let err = writer
.add_document(make_doc(pk, title, "key1", "Duplicate"))
.unwrap_err();
match err {
Error::DuplicatePrimaryKey(k) => assert_eq!(k, "key1"),
other => panic!("Expected DuplicatePrimaryKey, got {:?}", other),
}
let err = writer
.add_document(make_doc(pk, title, "key2", "Duplicate"))
.unwrap_err();
assert!(matches!(err, Error::DuplicatePrimaryKey(_)));
writer
.add_document(make_doc(pk, title, "key3", "Third"))
.unwrap();
writer.commit().await.unwrap();
}
#[tokio::test]
async fn test_dedup_multiple_commits() {
let (schema, pk, title) = make_schema();
let dir = RamDirectory::new();
let config = IndexConfig::default();
let mut writer = IndexWriter::create(dir.clone(), schema, config.clone())
.await
.unwrap();
writer.init_primary_key_dedup().await.unwrap();
for i in 0..10 {
writer
.add_document(make_doc(pk, title, &format!("k{}", i), "val"))
.unwrap();
}
writer.commit().await.unwrap();
for i in 0..10 {
assert!(
writer
.add_document(make_doc(pk, title, &format!("k{}", i), "dup"))
.is_err()
);
}
for i in 10..20 {
writer
.add_document(make_doc(pk, title, &format!("k{}", i), "val"))
.unwrap();
}
writer.commit().await.unwrap();
for i in 0..20 {
assert!(
writer
.add_document(make_doc(pk, title, &format!("k{}", i), "dup"))
.is_err(),
"key k{} should be rejected as duplicate",
i
);
}
for i in 20..25 {
writer
.add_document(make_doc(pk, title, &format!("k{}", i), "val"))
.unwrap();
}
writer.commit().await.unwrap();
}
#[tokio::test]
async fn test_dedup_abort_clears_uncommitted() {
let (schema, pk, title) = make_schema();
let dir = RamDirectory::new();
let config = IndexConfig::default();
let mut writer = IndexWriter::create(dir.clone(), schema, config.clone())
.await
.unwrap();
writer.init_primary_key_dedup().await.unwrap();
writer
.add_document(make_doc(pk, title, "abort_key", "First"))
.unwrap();
assert!(
writer
.add_document(make_doc(pk, title, "abort_key", "Dup"))
.is_err()
);
let prepared = writer.prepare_commit().await.unwrap();
prepared.abort();
writer
.add_document(make_doc(pk, title, "abort_key", "Retry"))
.unwrap();
writer.commit().await.unwrap();
}
#[tokio::test]
async fn test_no_primary_key_allows_duplicates() {
let mut builder = SchemaBuilder::default();
let title = builder.add_text_field("title", true, true);
let schema = builder.build();
let dir = RamDirectory::new();
let config = IndexConfig::default();
let mut writer = IndexWriter::create(dir, schema, config).await.unwrap();
writer.init_primary_key_dedup().await.unwrap();
let mut doc1 = Document::new();
doc1.add_text(title, "same");
let mut doc2 = Document::new();
doc2.add_text(title, "same");
assert!(writer.add_document(doc1).is_ok());
assert!(writer.add_document(doc2).is_ok()); }
#[tokio::test]
async fn test_dedup_concurrent_inserts() {
let (schema, pk, title) = make_schema();
let dir = RamDirectory::new();
let config = IndexConfig::default();
let mut writer = IndexWriter::create(dir, schema, config).await.unwrap();
writer.init_primary_key_dedup().await.unwrap();
for i in 0..100 {
let key = format!("concurrent_{}", i);
assert!(
writer
.add_document(make_doc(pk, title, &key, "val"))
.is_ok()
);
}
for i in 0..100 {
let key = format!("concurrent_{}", i);
assert!(
writer
.add_document(make_doc(pk, title, &key, "dup"))
.is_err()
);
}
}
#[tokio::test]
async fn test_dedup_reopen_existing_index() {
let (schema, pk, title) = make_schema();
let dir = RamDirectory::new();
let config = IndexConfig::default();
let mut writer = IndexWriter::create(dir.clone(), schema.clone(), config.clone())
.await
.unwrap();
writer.init_primary_key_dedup().await.unwrap();
writer
.add_document(make_doc(pk, title, "existing1", "val"))
.unwrap();
writer
.add_document(make_doc(pk, title, "existing2", "val"))
.unwrap();
writer.commit().await.unwrap();
drop(writer);
let mut writer2 = IndexWriter::open(dir.clone(), config.clone())
.await
.unwrap();
writer2.init_primary_key_dedup().await.unwrap();
let err = writer2
.add_document(make_doc(pk, title, "existing1", "dup"))
.unwrap_err();
assert!(matches!(err, Error::DuplicatePrimaryKey(_)));
let err = writer2
.add_document(make_doc(pk, title, "existing2", "dup"))
.unwrap_err();
assert!(matches!(err, Error::DuplicatePrimaryKey(_)));
writer2
.add_document(make_doc(pk, title, "new_key", "val"))
.unwrap();
writer2.commit().await.unwrap();
}
#[tokio::test]
async fn test_dedup_partial_batch() {
let (schema, pk, title) = make_schema();
let dir = RamDirectory::new();
let config = IndexConfig::default();
let mut writer = IndexWriter::create(dir, schema, config).await.unwrap();
writer.init_primary_key_dedup().await.unwrap();
writer
.add_document(make_doc(pk, title, "pre", "existing"))
.unwrap();
let keys = ["new1", "pre", "new2", "new1", "new3"];
let mut ok_count = 0;
let mut dup_count = 0;
for key in keys {
match writer.add_document(make_doc(pk, title, key, "val")) {
Ok(()) => ok_count += 1,
Err(Error::DuplicatePrimaryKey(_)) => dup_count += 1,
Err(e) => panic!("Unexpected error: {:?}", e),
}
}
assert_eq!(ok_count, 3, "3 new keys should succeed");
assert_eq!(dup_count, 2, "2 duplicates should fail");
}
#[tokio::test]
async fn test_dedup_large_batch_bloom_fps() {
let (schema, pk, title) = make_schema();
let dir = RamDirectory::new();
let config = IndexConfig::default();
let mut writer = IndexWriter::create(dir.clone(), schema, config.clone())
.await
.unwrap();
writer.init_primary_key_dedup().await.unwrap();
for i in 0..5000 {
writer
.add_document(make_doc(pk, title, &format!("big_{}", i), "val"))
.unwrap();
}
writer.commit().await.unwrap();
let mut false_accepts = 0;
for i in 0..5000 {
if writer
.add_document(make_doc(pk, title, &format!("big_{}", i), "dup"))
.is_ok()
{
false_accepts += 1;
}
}
assert_eq!(
false_accepts, 0,
"No committed keys should be falsely accepted"
);
for i in 5000..10000 {
writer
.add_document(make_doc(pk, title, &format!("big_{}", i), "val"))
.unwrap();
}
}
#[test]
fn test_schema_primary_field() {
let (schema, pk, _) = make_schema();
assert_eq!(schema.primary_field(), Some(pk));
let mut builder = SchemaBuilder::default();
builder.add_text_field("title", true, true);
let schema = builder.build();
assert_eq!(schema.primary_field(), None);
}