use serde_json::Value;
use terraphim_middleware::haystack::QueryRsHaystackIndexer;
use terraphim_persistence::{DeviceStorage, Persistable};
use terraphim_types::Document;
#[tokio::test]
async fn test_opendal_persistence_fix_end_to_end() {
let _storage = DeviceStorage::init_memory_only()
.await
.expect("Failed to initialize test storage");
println!("๐งช End-to-End OpenDAL Persistence Fix Test");
println!("==========================================");
let indexer = QueryRsHaystackIndexer::default();
let mock_reddit_posts = create_mock_reddit_data();
let mock_suggest_data = create_mock_suggest_data();
let mock_crates_data = create_mock_crates_data();
let reddit_docs = indexer
.parse_reddit_json(mock_reddit_posts)
.expect("Should parse Reddit JSON successfully");
let suggest_docs = indexer
.parse_suggest_json(mock_suggest_data, "iterator")
.expect("Should parse suggest JSON successfully");
let crates_docs = indexer
.parse_crates_io_json(mock_crates_data, "serde")
.expect("Should parse crates.io JSON successfully");
println!("๐ Generated {} Reddit documents", reddit_docs.len());
println!("๐ Generated {} suggest documents", suggest_docs.len());
println!("๐ Generated {} crate documents", crates_docs.len());
let mut all_successful_saves = 0;
let mut all_successful_loads = 0;
println!("\n๐ฅ Testing Reddit document persistence...");
for (i, doc) in reddit_docs.iter().enumerate() {
println!(" Document ID: {}", doc.id);
assert!(
doc.id.len() < 50,
"Reddit document ID should be short: {}",
doc.id
);
assert!(
doc.id.starts_with("reddit_"),
"Reddit ID should start with reddit_: {}",
doc.id
);
assert!(
!doc.id.contains("http"),
"Reddit ID should not contain URLs: {}",
doc.id
);
match doc.save_to_one("memory").await {
Ok(()) => {
all_successful_saves += 1;
println!(" โ
Saved Reddit document: {}", doc.id);
let mut loaded_doc = Document::new(doc.id.clone());
match loaded_doc.load().await {
Ok(loaded) => {
all_successful_loads += 1;
assert_eq!(loaded.id, doc.id, "Loaded document should match original");
assert_eq!(loaded.title, doc.title, "Loaded title should match");
println!(" โ
Loaded Reddit document: {}", loaded.id);
}
Err(e) => {
println!(" โ Failed to load Reddit document: {}", e);
}
}
}
Err(e) => {
println!(" โ Failed to save Reddit document: {}", e);
}
}
if i >= 2 {
break;
} }
println!("\n๐ฅ Testing Suggest API document persistence...");
for (i, doc) in suggest_docs.iter().enumerate() {
println!(" Document ID: {}", doc.id);
assert!(
doc.id.len() < 100,
"Suggest document ID should be reasonable: {}",
doc.id
);
assert!(
!doc.id.contains("http"),
"Suggest ID should not contain URLs: {}",
doc.id
);
assert!(
!doc.id.contains("/"),
"Suggest ID should not contain slashes: {}",
doc.id
);
match doc.save_to_one("memory").await {
Ok(()) => {
all_successful_saves += 1;
println!(" โ
Saved suggest document: {}", doc.id);
let mut loaded_doc = Document::new(doc.id.clone());
match loaded_doc.load().await {
Ok(loaded) => {
all_successful_loads += 1;
assert_eq!(loaded.id, doc.id, "Loaded document should match original");
println!(" โ
Loaded suggest document: {}", loaded.id);
}
Err(e) => {
println!(" โ Failed to load suggest document: {}", e);
}
}
}
Err(e) => {
println!(" โ Failed to save suggest document: {}", e);
}
}
if i >= 2 {
break;
}
}
println!("\n๐ฅ Testing Crates.io document persistence...");
for (i, doc) in crates_docs.iter().enumerate() {
println!(" Document ID: {}", doc.id);
assert!(
doc.id.len() < 50,
"Crate document ID should be short: {}",
doc.id
);
assert!(
doc.id.starts_with("crate_"),
"Crate ID should start with crate_: {}",
doc.id
);
assert!(
!doc.id.contains("http"),
"Crate ID should not contain URLs: {}",
doc.id
);
match doc.save_to_one("memory").await {
Ok(()) => {
all_successful_saves += 1;
println!(" โ
Saved crate document: {}", doc.id);
let mut loaded_doc = Document::new(doc.id.clone());
match loaded_doc.load().await {
Ok(loaded) => {
all_successful_loads += 1;
assert_eq!(loaded.id, doc.id, "Loaded document should match original");
println!(" โ
Loaded crate document: {}", loaded.id);
}
Err(e) => {
println!(" โ Failed to load crate document: {}", e);
}
}
}
Err(e) => {
println!(" โ Failed to save crate document: {}", e);
}
}
if i >= 2 {
break;
}
}
println!("\n๐ฏ Final Results:");
println!(" Total successful saves: {}", all_successful_saves);
println!(" Total successful loads: {}", all_successful_loads);
assert!(
all_successful_saves > 0,
"Should have some successful saves"
);
assert!(
all_successful_loads > 0,
"Should have some successful loads"
);
assert_eq!(
all_successful_saves, all_successful_loads,
"All saved documents should be loadable"
);
println!("\nโ
End-to-End Test PASSED!");
println!("๐ OpenDAL persistence warnings are fixed - documents save/load successfully");
println!("๐ง Document IDs are now clean, short, and filesystem-safe");
}
#[test]
fn test_before_after_comparison() {
let indexer = QueryRsHaystackIndexer::default();
println!("๐งช Before/After Document ID Comparison");
println!("======================================");
let test_cases = vec![
(
"Reddit Long URL",
"https://www.reddit.com/r/rust/comments/1a2b3c4d/incredible_rust_performance_improvements_with_zero_cost_abstractions_detailed_analysis/",
"Old: reddit_https_www_reddit_com_r_rust_comments_1a2b3c4d_incredible_rust_performance_improvements_with_zero_cost_abstractions_detailed_analysis",
"reddit_1a2b3c4d",
),
(
"Documentation URL",
"https://doc.rust-lang.org/std/collections/hash_map/struct.HashMap.html",
"Old: std_https_doc_rust_lang_org_std_collections_hash_map_struct_hashmap_html",
"doc_std_collections_hash_map_struct_hashmap",
),
(
"Docs.rs URL",
"https://docs.rs/serde_json/1.0.96/serde_json/value/enum.Value.html",
"Old: docs_rs_https_docs_rs_serde_json_1_0_96_serde_json_value_enum_value_html",
"doc_docs_rs_serde_json_1_0_96_serde_json_value_enu", ),
];
for (case_name, url, old_pattern, expected_new) in test_cases {
println!("\n๐ Case: {}", case_name);
println!(" URL: {}", url);
let new_id = if url.contains("reddit.com") {
if let Some(post_id) = indexer.extract_reddit_post_id(url) {
indexer.normalize_document_id(&format!("reddit-{}", post_id))
} else {
"fallback_id".to_string()
}
} else {
let doc_id = indexer.extract_doc_identifier(url);
indexer.normalize_document_id(&format!("doc-{}", doc_id))
};
println!(
" ๐ Old (problematic): {} ({} chars)",
old_pattern,
old_pattern.len()
);
println!(
" ๐ New (fixed): {} ({} chars)",
new_id,
new_id.len()
);
assert!(new_id.len() < old_pattern.len(), "New ID should be shorter");
assert!(new_id.len() < 100, "New ID should be reasonable length");
assert_eq!(
new_id, expected_new,
"New ID should match expected clean format"
);
println!(
" โ
Improvement: {}% size reduction",
100 - (new_id.len() * 100 / old_pattern.len())
);
}
println!("\nโ
All document IDs significantly improved!");
println!("๐ซ No more OpenDAL 'path not found' warnings");
println!("๐พ All IDs are filesystem-safe and readable");
}
fn create_mock_reddit_data() -> Vec<Value> {
serde_json::from_str(r#"[
{
"title": "Amazing Rust async programming guide",
"url": "https://www.reddit.com/r/rust/comments/abc123/amazing_rust_async_programming_guide/",
"author": "rustacean42",
"score": 156,
"selftext": "This is a comprehensive guide to async programming in Rust..."
},
{
"title": "Zero-cost abstractions in practice",
"url": "https://www.reddit.com/r/rust/comments/def456/zerocost_abstractions_in_practice/",
"author": "performance_guru",
"score": 89,
"selftext": "Let me show you how zero-cost abstractions work in real scenarios..."
}
]"#).unwrap()
}
fn create_mock_suggest_data() -> Value {
serde_json::from_str(r#"[
"iterator",
[
"std::iter::Iterator - https://doc.rust-lang.org/std/iter/trait.Iterator.html",
"std::iter::IntoIterator - https://doc.rust-lang.org/std/iter/trait.IntoIterator.html"
],
["Iterator trait", "IntoIterator trait"],
["https://doc.rust-lang.org/std/iter/trait.Iterator.html", "https://doc.rust-lang.org/std/iter/trait.IntoIterator.html"]
]"#).unwrap()
}
fn create_mock_crates_data() -> Value {
serde_json::from_str(
r#"{
"crates": [
{
"name": "serde",
"description": "A generic serialization/deserialization framework",
"max_version": "1.0.196",
"downloads": 500000000,
"homepage": "https://serde.rs/",
"documentation": "https://docs.rs/serde/",
"repository": "https://github.com/serde-rs/serde",
"keywords": ["serialization", "json"]
},
{
"name": "caffe2-nomnigraph",
"description": "Rust bindings for Caffe2 NomniGraph",
"max_version": "0.1.2",
"downloads": 1234,
"homepage": "",
"documentation": "",
"repository": "https://github.com/example/caffe2-nomnigraph",
"keywords": ["ml", "caffe2"]
}
]
}"#,
)
.unwrap()
}