use oxidize_pdf::text::invoice::{
FieldPattern, InvoiceExtractor, InvoiceFieldType, Language, PatternLibrary,
};
#[test]
fn test_create_empty_pattern_library() {
let mut patterns = PatternLibrary::new();
let pattern = FieldPattern::new(
InvoiceFieldType::InvoiceNumber,
r"Order\s+#([0-9]+)",
0.9,
None,
)
.expect("Failed to create pattern");
patterns.add_pattern(pattern);
let text = "Order #12345";
let matches = patterns.match_text(text);
assert_eq!(matches.len(), 1, "Should find 1 match");
assert_eq!(matches[0].1, "12345", "Should extract order number");
assert_eq!(matches[0].2, 0.9, "Should have correct confidence");
}
#[test]
fn test_default_spanish_patterns() {
let patterns = PatternLibrary::default_spanish();
let text = "Factura Nº: 2025-001\nFecha: 20/01/2025\nTotal: 1.234,56€";
let matches = patterns.match_text(text);
assert!(matches.len() >= 3, "Should match multiple Spanish patterns");
let has_invoice_num = matches
.iter()
.any(|(field_type, _, _)| matches!(field_type, InvoiceFieldType::InvoiceNumber));
assert!(has_invoice_num, "Should extract Spanish invoice number");
}
#[test]
fn test_default_english_patterns() {
let patterns = PatternLibrary::default_english();
let text = "Invoice Number: INV-2025-001\nDate: 01/20/2025\nTotal: £1,234.56";
let matches = patterns.match_text(text);
assert!(matches.len() >= 3, "Should match multiple English patterns");
let has_invoice_num = matches
.iter()
.any(|(field_type, _, _)| matches!(field_type, InvoiceFieldType::InvoiceNumber));
assert!(has_invoice_num, "Should extract English invoice number");
}
#[test]
fn test_extend_default_patterns() {
let mut patterns = PatternLibrary::default_spanish();
patterns.add_pattern(
FieldPattern::new(
InvoiceFieldType::InvoiceNumber,
r"Ref:\s*([A-Z0-9\-]+)",
0.85,
Some(Language::Spanish),
)
.expect("Failed to create custom pattern"),
);
let text1 = "Factura Nº: 2025-001"; let text2 = "Ref: CUSTOM-123";
let matches1 = patterns.match_text(text1);
let matches2 = patterns.match_text(text2);
assert_eq!(matches1.len(), 1, "Default pattern should work");
assert_eq!(matches1[0].1, "2025-001");
assert_eq!(matches2.len(), 1, "Custom pattern should work");
assert_eq!(matches2[0].1, "CUSTOM-123");
}
#[test]
fn test_merge_pattern_libraries() {
let mut spanish = PatternLibrary::default_spanish();
let mut custom = PatternLibrary::new();
custom.add_pattern(
FieldPattern::new(
InvoiceFieldType::InvoiceNumber,
r"Order\s+#([0-9]+)",
0.8,
None,
)
.unwrap(),
);
spanish.merge(custom);
let text1 = "Factura Nº: 2025-001"; let text2 = "Order #9999";
let matches1 = spanish.match_text(text1);
let matches2 = spanish.match_text(text2);
assert_eq!(matches1.len(), 1, "Spanish pattern should still work");
assert_eq!(matches2.len(), 1, "Custom pattern should work after merge");
}
#[test]
fn test_extractor_with_custom_patterns() {
let mut patterns = PatternLibrary::new();
patterns.add_pattern(
FieldPattern::new(
InvoiceFieldType::InvoiceNumber,
r"Order\s+#([0-9]+)",
0.9,
None,
)
.unwrap(),
);
patterns.add_pattern(
FieldPattern::new(
InvoiceFieldType::TotalAmount,
r"Amount:\s*\$([0-9,]+\.[0-9]{2})",
0.9,
None,
)
.unwrap(),
);
let extractor = InvoiceExtractor::builder()
.with_custom_patterns(patterns)
.confidence_threshold(0.8)
.build();
let invoice_text = "Order #12345\nAmount: $1,234.56";
let result = extractor
.extract_from_text(invoice_text)
.expect("Extraction should succeed");
assert_eq!(
result.fields.len(),
2,
"Should extract 2 fields with custom patterns"
);
}
#[test]
fn test_custom_patterns_override_language() {
let mut patterns = PatternLibrary::new();
patterns.add_pattern(
FieldPattern::new(
InvoiceFieldType::InvoiceNumber,
r"Order\s+#([0-9]+)",
0.9,
None,
)
.unwrap(),
);
let extractor = InvoiceExtractor::builder()
.with_language("es") .with_custom_patterns(patterns)
.build();
let spanish_text = "Factura Nº: 2025-001";
let result1 = extractor.extract_from_text(spanish_text);
let fields1 = result1.unwrap().fields;
assert_eq!(
fields1.len(),
0,
"Spanish pattern should NOT work (overridden by custom)"
);
let custom_text = "Order #9999";
let result2 = extractor.extract_from_text(custom_text);
let fields2 = result2.unwrap().fields;
assert_eq!(fields2.len(), 1, "Custom pattern should work");
}
#[test]
fn test_combine_default_and_custom() {
let mut patterns = PatternLibrary::default_german();
patterns.add_pattern(
FieldPattern::new(
InvoiceFieldType::InvoiceNumber,
r"Bestellnummer:\s*([A-Z0-9\-]+)",
0.85,
Some(Language::German),
)
.unwrap(),
);
let extractor = InvoiceExtractor::builder()
.with_custom_patterns(patterns)
.confidence_threshold(0.7)
.build();
let text1 = "Rechnung Nr. 2025-001";
let result1 = extractor.extract_from_text(text1).unwrap();
assert!(
!result1.fields.is_empty(),
"Default German pattern should work"
);
let text2 = "Bestellnummer: CUSTOM-999";
let result2 = extractor.extract_from_text(text2).unwrap();
assert_eq!(result2.fields.len(), 1, "Custom pattern should work");
}
#[test]
fn test_pattern_library_is_thread_safe() {
fn assert_send<T: Send>() {}
fn assert_sync<T: Sync>() {}
assert_send::<PatternLibrary>();
assert_sync::<PatternLibrary>();
}