use crate::Result;
use crate::plugins::registry::get_post_processor_registry;
use once_cell::sync::OnceCell;
use std::sync::Arc;
pub mod config;
pub mod processor;
pub mod types;
#[cfg(feature = "keywords-yake")]
mod yake;
#[cfg(feature = "keywords-rake")]
mod rake;
pub use config::KeywordConfig;
pub use processor::KeywordExtractor;
#[cfg(feature = "keywords-rake")]
pub use config::RakeParams;
#[cfg(feature = "keywords-yake")]
pub use config::YakeParams;
pub use types::{Keyword, KeywordAlgorithm};
pub fn extract_keywords(text: &str, config: &KeywordConfig) -> Result<Vec<Keyword>> {
match config.algorithm {
#[cfg(feature = "keywords-yake")]
KeywordAlgorithm::Yake => yake::extract_keywords_yake(text, config),
#[cfg(feature = "keywords-rake")]
KeywordAlgorithm::Rake => rake::extract_keywords_rake(text, config),
#[cfg(not(any(feature = "keywords-yake", feature = "keywords-rake")))]
_ => Err(crate::KreuzbergError::Other(
"No keyword extraction algorithm feature enabled".to_string(),
)),
}
}
static PROCESSOR_INITIALIZED: OnceCell<()> = OnceCell::new();
pub fn ensure_initialized() -> Result<()> {
PROCESSOR_INITIALIZED
.get_or_try_init(register_keyword_processor)
.map(|_| ())
}
pub fn register_keyword_processor() -> Result<()> {
let registry = get_post_processor_registry();
let mut registry = registry.write();
registry.register(Arc::new(KeywordExtractor), 50)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_keywords_default_algorithm() {
let text = "Rust programming language provides memory safety and performance.";
let config = KeywordConfig::default();
let keywords = extract_keywords(text, &config).unwrap();
assert!(!keywords.is_empty(), "Should extract keywords");
assert!(keywords.len() <= config.max_keywords);
}
#[cfg(feature = "keywords-yake")]
#[test]
fn test_extract_keywords_yake() {
let text = "Natural language processing using Rust is efficient and safe.";
let config = KeywordConfig::yake();
let keywords = extract_keywords(text, &config).unwrap();
assert!(!keywords.is_empty());
assert_eq!(keywords[0].algorithm, KeywordAlgorithm::Yake);
}
#[cfg(feature = "keywords-rake")]
#[test]
fn test_extract_keywords_rake() {
let text = "Natural language processing using Rust is efficient and safe.";
let config = KeywordConfig::rake();
let keywords = extract_keywords(text, &config).unwrap();
assert!(!keywords.is_empty());
assert_eq!(keywords[0].algorithm, KeywordAlgorithm::Rake);
}
#[cfg(all(feature = "keywords-yake", feature = "keywords-rake"))]
#[test]
fn test_compare_algorithms() {
let text = "Machine learning and artificial intelligence are transforming technology. \
Deep learning models require substantial computational resources.";
let yake_config = KeywordConfig::yake().with_max_keywords(5);
let yake_keywords = extract_keywords(text, &yake_config).unwrap();
let rake_config = KeywordConfig::rake().with_max_keywords(5);
let rake_keywords = extract_keywords(text, &rake_config).unwrap();
assert!(!yake_keywords.is_empty());
assert!(!rake_keywords.is_empty());
assert!(yake_keywords.iter().all(|k| k.algorithm == KeywordAlgorithm::Yake));
assert!(rake_keywords.iter().all(|k| k.algorithm == KeywordAlgorithm::Rake));
println!(
"YAKE keywords: {:?}",
yake_keywords.iter().map(|k| &k.text).collect::<Vec<_>>()
);
println!(
"RAKE keywords: {:?}",
rake_keywords.iter().map(|k| &k.text).collect::<Vec<_>>()
);
}
}