#![allow(
clippy::print_stderr,
reason = "tests use stderr for diagnostic output"
)]
use std::sync::{Mutex, OnceLock};
const MODEL_LANGUAGE: &str = "english-ewt";
static MODEL: OnceLock<(tempfile::TempDir, String, Mutex<udpipe_rs::Model>)> = OnceLock::new();
fn get_model_state() -> &'static (tempfile::TempDir, String, Mutex<udpipe_rs::Model>) {
MODEL.get_or_init(|| {
let temp_dir = tempfile::tempdir().expect("Failed to create temp directory");
eprintln!("Downloading {MODEL_LANGUAGE} model for integration tests...");
let model_path = udpipe_rs::download_model(MODEL_LANGUAGE, temp_dir.path())
.expect("Failed to download model for integration tests");
let model = udpipe_rs::Model::load(&model_path).expect("Failed to load model");
(temp_dir, model_path, Mutex::new(model))
})
}
fn parse(text: &str) -> Result<Vec<udpipe_rs::Word>, udpipe_rs::UdpipeError> {
get_model_state()
.2
.lock()
.expect("Model mutex poisoned")
.parse(text)
}
#[test]
fn test_parse_simple_sentence() {
let words = parse("Hello world!").expect("Failed to parse");
assert!(!words.is_empty());
assert!(words.iter().any(|w| w.form == "Hello"));
assert!(words.iter().any(|w| w.form == "world"));
}
#[test]
fn test_parse_multiple_sentences() {
let words = parse("The cat sat. The dog ran.").expect("Failed to parse");
assert!(words.len() >= 6);
let sentence_ids: Vec<_> = words.iter().map(|w| w.sentence_id).collect();
assert!(sentence_ids.contains(&0));
assert!(sentence_ids.contains(&1));
}
#[test]
fn test_word_ids_are_sequential() {
let words = parse("The quick brown fox.").expect("Failed to parse");
assert!(!words.is_empty(), "Should have parsed words");
for word in &words {
assert!(word.id >= 1, "Word ID should be >= 1");
}
}
#[test]
fn test_dependency_structure() {
let words = parse("The cat sleeps.").expect("Failed to parse");
let roots: Vec<_> = words.iter().filter(|w| w.is_root()).collect();
assert_eq!(roots.len(), 1, "Should have exactly one root");
let root = roots[0];
assert!(
root.is_verb() || root.upostag == "VERB",
"Root should be a verb"
);
}
#[test]
fn test_morphological_features() {
let words = parse("She runs quickly.").expect("Failed to parse");
let verb = words.iter().find(|w| w.lemma == "run");
assert!(verb.is_some(), "Should find verb 'run'");
let verb = verb.unwrap();
assert!(verb.is_verb());
assert!(
verb.has_feature("Tense", "Pres") || verb.has_feature("VerbForm", "Fin"),
"Verb should have tense/form features"
);
}
#[test]
fn test_empty_input() {
let words = parse("").expect("Should handle empty input");
assert!(words.is_empty(), "Empty input should produce no words");
}
#[test]
fn test_unicode_input() {
let words = parse("Héllo wörld! 你好").expect("Should handle Unicode");
assert!(!words.is_empty());
}
#[test]
fn test_misc_field_space_after() {
let words = parse("Hello, world!").expect("Failed to parse");
let has_space = words.iter().filter(|w| w.has_space_after()).count();
let no_space = words.iter().filter(|w| !w.has_space_after()).count();
assert!(has_space > 0, "Should have words with space after");
assert!(
no_space > 0,
"Should have words without space after (punctuation)"
);
}
#[test]
fn test_xpostag_field() {
let words = parse("The cat sleeps.").expect("Failed to parse");
assert!(!words.is_empty(), "Should have parsed words");
for word in &words {
assert!(word.xpostag.is_ascii() || word.xpostag.is_empty());
}
}
#[test]
fn test_parse_with_null_byte() {
let result = parse("Hello\0world");
assert!(result.is_err());
let err = result.unwrap_err();
assert!(err.message.contains("null byte"));
}
#[test]
fn test_load_from_memory() {
let model_path = &get_model_state().1;
let model_data = std::fs::read(model_path).expect("Failed to read model file");
let model =
udpipe_rs::Model::load_from_memory(&model_data).expect("Failed to load from memory");
let words = model.parse("Test sentence.").expect("Failed to parse");
assert!(!words.is_empty());
}
#[test]
fn test_model_drop() {
let model_path = &get_model_state().1;
let model = udpipe_rs::Model::load(model_path).expect("Failed to load model");
drop(model); }
#[test]
fn test_word_pos_helpers() {
let words = parse("The quick brown fox jumps.").expect("Failed to parse");
let has_noun = words.iter().any(udpipe_rs::Word::is_noun);
assert!(has_noun, "Should have at least one noun");
let has_adj = words.iter().any(udpipe_rs::Word::is_adjective);
assert!(has_adj, "Should have at least one adjective");
let has_punct = words.iter().any(udpipe_rs::Word::is_punct);
assert!(has_punct, "Should have punctuation");
}
#[test]
fn test_word_get_feature() {
let words = parse("She runs.").expect("Failed to parse");
let word_with_feats = words.iter().find(|w| !w.feats.is_empty());
if let Some(word) = word_with_feats {
let _ = word.get_feature("Number");
let _ = word.get_feature("NonExistent");
}
}