use terraphim_middleware::haystack::QueryRsHaystackIndexer;
#[tokio::test]
async fn test_opendal_warning_fixes() {
let indexer = QueryRsHaystackIndexer::default();
println!("๐งช Testing OpenDAL Warning Fixes");
println!("=================================");
let test_cases = vec![
(
"crate-gravity-db-0.1.0",
"should not contain crategravitydb010md",
),
("crate-gqlite", "should be properly normalized"),
("crate-gqlite-0.0.0", "should not contain crategqlite000"),
("crate-caffe2-nomnigraph", "should handle hyphenated names"),
("docs-gravity-db", "should handle documentation IDs"),
("reddit-abc123", "should handle Reddit IDs"),
("crate-gravity-db-0.1.0.md", "should remove .md extension"),
("crate/gqlite/0.0.0", "should handle path separators"),
];
println!();
for (original_id, description) in &test_cases {
let normalized = indexer.normalize_document_id(original_id);
let expected_key = format!("document_{}.json", normalized);
println!("Testing: {} ({})", original_id, description);
println!(" โ Normalized: {}", normalized);
println!(" โ Key: {}", expected_key);
assert!(
!normalized.contains("crategravitydb010md"),
"ID should not contain crategravitydb010md pattern: {}",
normalized
);
assert!(
!normalized.contains("crategqlite000"),
"ID should not contain crategqlite000 pattern: {}",
normalized
);
assert!(
!normalized.ends_with("md"),
"ID should not end with 'md': {}",
normalized
);
assert!(
normalized.len() <= 50,
"ID should not be excessively long: {} ({})",
normalized,
normalized.len()
);
assert!(
!normalized.is_empty(),
"ID should not be empty: {}",
normalized
);
assert!(
!normalized.contains("__"),
"ID should not contain double underscores: {}",
normalized
);
println!(" โ
All validations passed");
println!();
}
println!("๐ฏ All OpenDAL warning fix tests passed!");
}
#[test]
fn test_malformed_id_detection() {
let indexer = QueryRsHaystackIndexer::default();
println!("๐งช Testing Malformed ID Detection");
println!("=================================");
let malformed_test_cases = vec![
"crategravitydb010md", "crategqlite000", "some-very-long-document-id-that-exceeds-reasonable-limits-and-should-be-cleaned-up",
"document.with.dots.md",
"", ];
for malformed_id in &malformed_test_cases {
println!("Testing malformed ID: '{}'", malformed_id);
let normalized = indexer.normalize_document_id(malformed_id);
println!(" โ Cleaned to: '{}'", normalized);
println!(
" โ Original ends with md: {}",
malformed_id.ends_with("md")
);
println!(
" โ Normalized ends with md: {}",
normalized.ends_with("md")
);
println!(" โ Length: {}", normalized.len());
assert!(
!normalized.is_empty(),
"Should not produce empty ID from '{}'",
malformed_id
);
assert!(
!normalized.ends_with("md"),
"Should not end with md: '{}' from '{}'",
normalized,
malformed_id
);
assert!(
normalized.len() <= 50,
"Should not be excessively long: {} ({})",
normalized,
normalized.len()
);
assert!(
!normalized.contains("crategravitydb010md"),
"Should not contain problematic patterns: '{}'",
normalized
);
println!(" โ
Cleanup successful");
}
}
#[test]
fn test_normal_ids_still_work() {
let indexer = QueryRsHaystackIndexer::default();
println!("๐งช Testing Normal IDs Still Work");
println!("=================================");
let normal_test_cases = vec![
("crate-serde", "crate_serde"),
("reddit-abc123", "reddit_abc123"),
("docs-tokio", "docs_tokio"),
("simple-document", "simple_document"),
];
for (input, expected) in &normal_test_cases {
let normalized = indexer.normalize_document_id(input);
assert_eq!(
&normalized, expected,
"Normal ID normalization should work: {} โ {}",
input, expected
);
println!("โ
{} โ {} (expected {})", input, normalized, expected);
}
}