use inference::extraction::{build_provider, ExtractionOpts, ExtractorConfig, NoneExtractor};
use inference::{rule_based_extract, ExtractedEntity, ExtractionProvider};
#[test]
fn rule_extract_iso_date_tier1a_standard() {
let entities = rule_based_extract("Meeting scheduled on 2023-06-15 at the office.");
let dates: Vec<&ExtractedEntity> = entities
.iter()
.filter(|e| e.entity_type == "date")
.collect();
assert_eq!(dates.len(), 1, "should extract exactly one ISO date");
assert_eq!(dates[0].value, "2023-06-15");
assert_eq!(dates[0].score, 1.0, "rule-based entities always score 1.0");
}
#[test]
fn rule_extract_iso_date_tier1a_start_of_year() {
let entities = rule_based_extract("Event on 2024-01-01.");
let dates: Vec<&ExtractedEntity> = entities
.iter()
.filter(|e| e.entity_type == "date")
.collect();
assert_eq!(dates.len(), 1);
assert_eq!(dates[0].value, "2024-01-01");
}
#[test]
fn rule_extract_iso_date_tier1a_end_of_year() {
let entities = rule_based_extract("Archive date: 2022-12-31");
let dates: Vec<&ExtractedEntity> = entities
.iter()
.filter(|e| e.entity_type == "date")
.collect();
assert_eq!(dates.len(), 1);
assert_eq!(dates[0].value, "2022-12-31");
}
#[test]
fn rule_extract_multiple_iso_dates() {
let entities = rule_based_extract("Contract starts 2024-03-01 and ends 2025-02-28.");
let dates: Vec<&ExtractedEntity> = entities
.iter()
.filter(|e| e.entity_type == "date")
.collect();
assert_eq!(dates.len(), 2, "should extract both ISO dates");
}
#[test]
fn rule_extract_natural_date_tier1b_full() {
let entities = rule_based_extract("The event was held on January 15, 2023.");
let dates: Vec<&ExtractedEntity> = entities
.iter()
.filter(|e| e.entity_type == "date")
.collect();
assert!(!dates.is_empty(), "should extract natural language date");
assert!(
dates[0].value.contains("January"),
"date value should contain month name, got: {}",
dates[0].value
);
}
#[test]
fn rule_extract_natural_date_tier1b_abbreviated_month() {
let entities = rule_based_extract("Published Dec 25, 2020.");
let dates: Vec<&ExtractedEntity> = entities
.iter()
.filter(|e| e.entity_type == "date")
.collect();
assert!(!dates.is_empty(), "abbreviated month should be extracted");
assert!(dates[0].value.contains("Dec"));
}
#[test]
fn rule_extract_natural_date_tier1b_no_year() {
let entities = rule_based_extract("See you March 5 at the conference.");
let dates: Vec<&ExtractedEntity> = entities
.iter()
.filter(|e| e.entity_type == "date")
.collect();
assert!(
!dates.is_empty(),
"month+day without year should be extracted"
);
}
#[test]
fn rule_extract_relative_time_last_month_excluded() {
let entities = rule_based_extract("I visited last month and it was great.");
let dates: Vec<&ExtractedEntity> = entities
.iter()
.filter(|e| e.entity_type == "date")
.collect();
assert!(
dates.is_empty(),
"relative time 'last month' must not be extracted, got: {:?}",
dates.iter().map(|e| &e.value).collect::<Vec<_>>()
);
}
#[test]
fn rule_extract_relative_time_years_ago_excluded() {
let entities = rule_based_extract("That happened 3 years ago when we started.");
let dates: Vec<&ExtractedEntity> = entities
.iter()
.filter(|e| e.entity_type == "date")
.collect();
assert!(
dates.is_empty(),
"'3 years ago' must not be extracted as a date, got: {:?}",
dates.iter().map(|e| &e.value).collect::<Vec<_>>()
);
}
#[test]
fn rule_extract_bare_year_not_extracted() {
let entities = rule_based_extract("John moved to Seattle in 2023 and started a new job.");
let dates: Vec<&ExtractedEntity> = entities
.iter()
.filter(|e| e.entity_type == "date")
.collect();
assert!(
dates.is_empty(),
"bare year '2023' must not be extracted — CE-93: temporal proximity applies in scoring, \
not extraction; got: {:?}",
dates.iter().map(|e| &e.value).collect::<Vec<_>>()
);
}
#[test]
fn rule_extract_next_week_excluded() {
let entities = rule_based_extract("The report is due next week.");
let dates: Vec<&ExtractedEntity> = entities
.iter()
.filter(|e| e.entity_type == "date")
.collect();
assert!(
dates.is_empty(),
"'next week' must not be extracted as a date"
);
}
#[test]
fn rule_extract_email_found() {
let entities = rule_based_extract("Contact us at support@dakera.ai for help.");
let emails: Vec<&ExtractedEntity> = entities
.iter()
.filter(|e| e.entity_type == "email")
.collect();
assert_eq!(emails.len(), 1, "should find one email");
assert_eq!(emails[0].value, "support@dakera.ai");
}
#[test]
fn rule_extract_email_not_also_extracted_as_url() {
let entities = rule_based_extract("Email: info@example.com");
let urls: Vec<&ExtractedEntity> = entities.iter().filter(|e| e.entity_type == "url").collect();
let emails: Vec<&ExtractedEntity> = entities
.iter()
.filter(|e| e.entity_type == "email")
.collect();
assert_eq!(emails.len(), 1, "email should appear once");
assert!(
!urls.iter().any(|u| u.value.contains("info@example.com")),
"email must not also be extracted as URL"
);
}
#[test]
fn rule_extract_url_found() {
let entities = rule_based_extract("Visit https://dakera.ai/docs for documentation.");
let urls: Vec<&ExtractedEntity> = entities.iter().filter(|e| e.entity_type == "url").collect();
assert_eq!(urls.len(), 1);
assert!(urls[0].value.starts_with("https://"));
}
#[test]
fn rule_extract_uuid_found() {
let entities = rule_based_extract("Session ID: 550e8400-e29b-41d4-a716-446655440000 logged.");
let uuids: Vec<&ExtractedEntity> = entities
.iter()
.filter(|e| e.entity_type == "uuid")
.collect();
assert_eq!(uuids.len(), 1);
assert_eq!(uuids[0].value, "550e8400-e29b-41d4-a716-446655440000");
}
#[test]
fn rule_extract_ipv4_found() {
let entities = rule_based_extract("Server at 192.168.1.100 is unreachable.");
let ips: Vec<&ExtractedEntity> = entities.iter().filter(|e| e.entity_type == "ip").collect();
assert_eq!(ips.len(), 1);
assert_eq!(ips[0].value, "192.168.1.100");
}
#[test]
fn rule_extract_empty_text_returns_empty() {
let entities = rule_based_extract("");
assert!(entities.is_empty());
}
#[test]
fn extracted_entity_to_tag_format() {
let entity = ExtractedEntity {
entity_type: "person".to_string(),
value: "Alice".to_string(),
score: 0.95,
start: 0,
end: 5,
};
assert_eq!(entity.to_tag(), "entity:person:alice");
}
#[test]
fn extracted_entity_to_tag_lowercases_value() {
let entity = ExtractedEntity {
entity_type: "organization".to_string(),
value: "Anthropic".to_string(),
score: 0.9,
start: 0,
end: 9,
};
let tag = entity.to_tag();
assert_eq!(tag, "entity:organization:anthropic");
}
#[test]
fn extracted_entity_to_tag_sanitizes_colons_in_value() {
let entity = ExtractedEntity {
entity_type: "url".to_string(),
value: "https://example.com".to_string(),
score: 1.0,
start: 0,
end: 19,
};
let tag = entity.to_tag();
let parts: Vec<&str> = tag.splitn(3, ':').collect();
assert_eq!(parts[0], "entity");
assert_eq!(parts[1], "url");
assert!(
!parts[2].contains(':'),
"value part of tag must not contain colons, got: {tag}"
);
}
#[test]
fn extractor_config_none_has_correct_provider() {
let cfg = ExtractorConfig::none();
assert_eq!(cfg.provider, "none");
assert!(cfg.model.is_none());
assert!(cfg.base_url.is_none());
assert!(cfg.api_key.is_none());
}
#[test]
fn extractor_config_gliner_has_correct_provider() {
let cfg = ExtractorConfig::gliner();
assert_eq!(cfg.provider, "gliner");
}
#[test]
fn extractor_config_debug_redacts_api_key() {
let cfg = ExtractorConfig {
provider: "openai".to_string(),
model: None,
base_url: None,
api_key: Some("sk-supersecret-key-1234".to_string()),
};
let debug_output = format!("{:?}", cfg);
assert!(
!debug_output.contains("sk-supersecret-key-1234"),
"API key must not appear in Debug output, got: {debug_output}"
);
assert!(
debug_output.contains("[REDACTED]") || debug_output.contains("REDACTED"),
"Debug output should show [REDACTED] for api_key, got: {debug_output}"
);
}
#[test]
fn build_provider_none_returns_none_provider() {
let cfg = ExtractorConfig::none();
let provider = build_provider(&cfg, None);
assert_eq!(provider.provider_name(), "none");
}
#[test]
fn build_provider_unknown_falls_back_to_none() {
let cfg = ExtractorConfig {
provider: "nonexistent_provider".to_string(),
model: None,
base_url: None,
api_key: None,
};
let provider = build_provider(&cfg, None);
assert_eq!(
provider.provider_name(),
"none",
"unknown provider must fall back to NoneExtractor"
);
}
#[test]
fn build_provider_gliner_without_ner_engine_returns_gliner() {
let cfg = ExtractorConfig::gliner();
let provider = build_provider(&cfg, None);
assert_eq!(provider.provider_name(), "gliner");
}
#[tokio::test]
async fn none_extractor_returns_empty_extraction_result() {
use inference::ExtractionResult;
let extractor = NoneExtractor;
let opts = ExtractionOpts::default();
let result = extractor.extract("Some text with entities.", &opts).await;
assert!(result.is_ok(), "NoneExtractor should never error");
let r: ExtractionResult = result.unwrap();
assert_eq!(r.provider, "none");
assert!(
r.entities.is_empty(),
"NoneExtractor should return zero entities"
);
assert!(r.topics.is_empty());
assert!(r.key_phrases.is_empty());
assert!(r.summary.is_none());
}