pub mod error;
pub mod extractors;
pub mod models;
pub mod parsers;
pub use error::{InvoiceParserError, Result};
pub use extractors::{PdfDocument, PdfExtractor, SheetData, XlsxExtractor};
pub use models::{
Address, Currency, DocumentFormat, Invoice, InvoiceType, InvoiceValidation, LineItem,
LineItemValidation, ParseResult, Party, PaymentInfo, TaxSummary,
};
pub use parsers::InvoiceParser;
use std::path::Path;
pub fn parse_file<P: AsRef<Path>>(path: P) -> Result<ParseResult> {
InvoiceParser::parse_file(path)
}
pub fn parse_pdf<P: AsRef<Path>>(path: P) -> Result<ParseResult> {
InvoiceParser::parse_pdf(path)
}
pub fn parse_pdf_bytes(bytes: &[u8]) -> Result<ParseResult> {
InvoiceParser::parse_pdf_bytes(bytes)
}
pub fn parse_xlsx<P: AsRef<Path>>(path: P) -> Result<ParseResult> {
InvoiceParser::parse_xlsx(path)
}
pub fn parse_text(text: &str) -> Result<Invoice> {
InvoiceParser::parse_text(text)
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Datelike;
#[test]
fn test_parse_simple_invoice_text() {
let text = r#"
INVOICE
Invoice Number: INV-2024-001
Invoice Date: 2024-01-15
Due Date: 2024-02-15
Subtotal: $1,000.00
Tax: $100.00
Total: $1,100.00
"#;
let invoice = parse_text(text).unwrap();
assert_eq!(invoice.invoice_number, Some("INV-2024-001".to_string()));
assert_eq!(invoice.invoice_type, InvoiceType::Standard);
assert_eq!(invoice.total_amount, 1100.0);
assert_eq!(invoice.subtotal, Some(1000.0));
assert_eq!(invoice.total_tax, Some(100.0));
}
#[test]
fn test_parse_invoice_with_currency() {
let text = "Invoice #12345\nTotal: EUR 500.00";
let invoice = parse_text(text).unwrap();
assert_eq!(invoice.invoice_number, Some("12345".to_string()));
assert_eq!(invoice.currency, Currency::EUR);
}
#[test]
fn test_detect_credit_note() {
let text = "CREDIT NOTE\nReference: CN-001\nTotal: $200.00";
let invoice = parse_text(text).unwrap();
assert_eq!(invoice.invoice_type, InvoiceType::CreditNote);
}
#[test]
fn test_detect_receipt() {
let text = "RECEIPT\nReceipt #R-001\nAmount: $50.00";
let invoice = parse_text(text).unwrap();
assert_eq!(invoice.invoice_type, InvoiceType::Receipt);
}
#[test]
fn test_currency_from_str() {
assert_eq!(Currency::from("USD"), Currency::USD);
assert_eq!(Currency::from("$"), Currency::USD);
assert_eq!(Currency::from("EUR"), Currency::EUR);
assert_eq!(Currency::from("€"), Currency::EUR);
assert_eq!(Currency::from("GBP"), Currency::GBP);
assert_eq!(Currency::from("£"), Currency::GBP);
assert_eq!(Currency::from("CNY"), Currency::CNY);
assert_eq!(Currency::from("XYZ"), Currency::Other("XYZ".to_string()));
}
#[test]
fn test_invoice_to_json() {
let text = "Invoice #TEST-001\nTotal: $100.00";
let invoice = parse_text(text).unwrap();
let json = invoice.to_json().unwrap();
assert!(json.contains("TEST-001"));
assert!(json.contains("100"));
}
#[test]
fn test_empty_document_error() {
let result = parse_text("");
assert!(result.is_err());
}
#[test]
fn test_parse_result_with_source() {
let invoice = Invoice::new();
let result = ParseResult::single(invoice).with_source("test.pdf");
assert_eq!(result.source_file, Some("test.pdf".to_string()));
assert_eq!(result.invoices.len(), 1);
}
#[test]
fn test_aws_account_number_extraction() {
let text = "Account No:200221655242 (Mlytics Limited-200221655242)\nTotal Amount Due\nUSD 13586.67";
let invoice = parse_text(text).unwrap();
assert_eq!(invoice.invoice_number, Some("200221655242".to_string()));
}
#[test]
fn test_account_name_extraction() {
let text = "Account No: 970363902599 ( mlytics-prdservice2 )\nCompany name: Mlytics Limited\nTotal: $47.59";
let invoice = parse_text(text).unwrap();
assert_eq!(invoice.invoice_number, Some("970363902599".to_string()));
assert_eq!(
invoice.account_name,
Some("mlytics-prdservice2".to_string())
);
}
#[test]
fn test_aws_vendor_detection() {
let text = "AWS Services Pricing 14,301.76\nTotal: $100.00";
let invoice = parse_text(text).unwrap();
assert_eq!(invoice.vendor.name, Some("Amazon Web Services".to_string()));
}
#[test]
fn test_ecloudvalley_vendor_detection() {
let text = "伊雲谷(香港) ECLOUDVALLEY TECHNOLOGY (HK) LIMITED\nTotal: $100.00";
let invoice = parse_text(text).unwrap();
assert_eq!(
invoice.vendor.name,
Some("eCloudValley Technology".to_string())
);
}
#[test]
fn test_customer_extraction_from_company_name() {
let text = "Company name: Mlytics Limited\nBilling Address: Some Address\nTotal: $100.00";
let invoice = parse_text(text).unwrap();
assert_eq!(invoice.customer.name, Some("Mlytics Limited".to_string()));
}
#[test]
fn test_aws_service_charge_subtotal() {
let text = "AWS Service Charge 1,806.55\nDiscount -108.39\nTotal Amount Due\nUSD 1,698.16";
let invoice = parse_text(text).unwrap();
assert_eq!(invoice.subtotal, Some(1806.55));
}
#[test]
fn test_aws_services_pricing_subtotal() {
let text = "AWS Services Pricing 14,301.76\nDiscount -715.09\nTotal: $13,586.67";
let invoice = parse_text(text).unwrap();
assert_eq!(invoice.subtotal, Some(14301.76));
}
#[test]
fn test_line_items_extraction_from_pdf() {
let result = parse_file("examples/pdf/202512-CH306-7133-970363902599.pdf").unwrap();
let invoice = &result.invoices[0];
assert!(!invoice.line_items.is_empty());
let vpc_item = invoice.line_items.iter().find(|item| {
item.service_name
.as_ref()
.is_some_and(|s| s.contains("Virtual Private Cloud"))
});
assert!(vpc_item.is_some());
assert_eq!(
vpc_item.unwrap().description,
"USE1-PublicIPv4:InUseAddress"
);
}
#[test]
fn test_line_items_have_service_name_and_description() {
let result = parse_file("examples/pdf/202512-CH306-7133-970363902599.pdf").unwrap();
let invoice = &result.invoices[0];
for item in &invoice.line_items {
assert!(item.service_name.is_some());
assert!(!item.description.is_empty());
}
}
#[test]
fn test_line_items_have_unit_price() {
let result = parse_file("examples/pdf/HPK_AWS_eCv_200221655242_202512.pdf").unwrap();
let invoice = &result.invoices[0];
let item_with_price = invoice
.line_items
.iter()
.find(|item| item.unit_price.is_some());
assert!(item_with_price.is_some());
}
#[test]
fn test_statement_type_detection() {
let text = "Description of Statement Currency: USD\nTotal: $100.00";
let invoice = parse_text(text).unwrap();
assert_eq!(invoice.invoice_type, InvoiceType::Statement);
}
#[test]
fn test_date_format_yyyy_mm_dd() {
let text = "Invoice Date: 2025-01-15\nTotal: $100.00";
let invoice = parse_text(text).unwrap();
assert!(invoice.invoice_date.is_some());
let date = invoice.invoice_date.unwrap();
assert_eq!(date.year(), 2025);
assert_eq!(date.month(), 1);
assert_eq!(date.day(), 15);
}
#[test]
fn test_date_format_yyyy_slash() {
let text = "Payment Due Date\n2025/02/25\nTotal: $100.00";
let invoice = parse_text(text).unwrap();
assert!(invoice.due_date.is_some());
let date = invoice.due_date.unwrap();
assert_eq!(date.year(), 2025);
assert_eq!(date.month(), 2);
assert_eq!(date.day(), 25);
}
#[test]
fn test_chinese_colon_support() {
let text = "Account No:123456789\nCompany name:Test Company\nTotal: $100.00";
let invoice = parse_text(text).unwrap();
assert_eq!(invoice.invoice_number, Some("123456789".to_string()));
assert_eq!(invoice.customer.name, Some("Test Company".to_string()));
}
#[test]
fn test_amount_with_comma_separator() {
let text = "Grand Total: $13,586.67";
let invoice = parse_text(text).unwrap();
assert_eq!(invoice.total_amount, 13586.67);
}
}