br-invoice-parser 0.1.10

A Rust library for parsing invoices and bills from PDF and XLSX files
Documentation
pub mod error;
pub mod extractors;
pub mod models;
pub mod parsers;

pub use error::{InvoiceParserError, Result};
pub use extractors::{PdfDocument, PdfExtractor, SheetData, XlsxExtractor};
pub use models::{
    Address, Currency, DocumentFormat, Invoice, InvoiceType, InvoiceValidation, LineItem,
    LineItemValidation, ParseResult, Party, PaymentInfo, TaxSummary,
};
pub use parsers::InvoiceParser;

use std::path::Path;

pub fn parse_file<P: AsRef<Path>>(path: P) -> Result<ParseResult> {
    InvoiceParser::parse_file(path)
}

pub fn parse_pdf<P: AsRef<Path>>(path: P) -> Result<ParseResult> {
    InvoiceParser::parse_pdf(path)
}

pub fn parse_pdf_bytes(bytes: &[u8]) -> Result<ParseResult> {
    InvoiceParser::parse_pdf_bytes(bytes)
}

pub fn parse_xlsx<P: AsRef<Path>>(path: P) -> Result<ParseResult> {
    InvoiceParser::parse_xlsx(path)
}

pub fn parse_text(text: &str) -> Result<Invoice> {
    InvoiceParser::parse_text(text)
}

#[cfg(test)]
mod tests {
    use super::*;
    use chrono::Datelike;

    #[test]
    fn test_parse_simple_invoice_text() {
        let text = r#"
            INVOICE
            Invoice Number: INV-2024-001
            Invoice Date: 2024-01-15
            Due Date: 2024-02-15
            
            Subtotal: $1,000.00
            Tax: $100.00
            Total: $1,100.00
        "#;

        let invoice = parse_text(text).unwrap();

        assert_eq!(invoice.invoice_number, Some("INV-2024-001".to_string()));
        assert_eq!(invoice.invoice_type, InvoiceType::Standard);
        assert_eq!(invoice.total_amount, 1100.0);
        assert_eq!(invoice.subtotal, Some(1000.0));
        assert_eq!(invoice.total_tax, Some(100.0));
    }

    #[test]
    fn test_parse_invoice_with_currency() {
        let text = "Invoice #12345\nTotal: EUR 500.00";
        let invoice = parse_text(text).unwrap();

        assert_eq!(invoice.invoice_number, Some("12345".to_string()));
        assert_eq!(invoice.currency, Currency::EUR);
    }

    #[test]
    fn test_detect_credit_note() {
        let text = "CREDIT NOTE\nReference: CN-001\nTotal: $200.00";
        let invoice = parse_text(text).unwrap();

        assert_eq!(invoice.invoice_type, InvoiceType::CreditNote);
    }

    #[test]
    fn test_detect_receipt() {
        let text = "RECEIPT\nReceipt #R-001\nAmount: $50.00";
        let invoice = parse_text(text).unwrap();

        assert_eq!(invoice.invoice_type, InvoiceType::Receipt);
    }

    #[test]
    fn test_currency_from_str() {
        assert_eq!(Currency::from("USD"), Currency::USD);
        assert_eq!(Currency::from("$"), Currency::USD);
        assert_eq!(Currency::from("EUR"), Currency::EUR);
        assert_eq!(Currency::from(""), Currency::EUR);
        assert_eq!(Currency::from("GBP"), Currency::GBP);
        assert_eq!(Currency::from("£"), Currency::GBP);
        assert_eq!(Currency::from("CNY"), Currency::CNY);
        assert_eq!(Currency::from("XYZ"), Currency::Other("XYZ".to_string()));
    }

    #[test]
    fn test_invoice_to_json() {
        let text = "Invoice #TEST-001\nTotal: $100.00";
        let invoice = parse_text(text).unwrap();
        let json = invoice.to_json().unwrap();

        assert!(json.contains("TEST-001"));
        assert!(json.contains("100"));
    }

    #[test]
    fn test_empty_document_error() {
        let result = parse_text("");
        assert!(result.is_err());
    }

    #[test]
    fn test_parse_result_with_source() {
        let invoice = Invoice::new();
        let result = ParseResult::single(invoice).with_source("test.pdf");

        assert_eq!(result.source_file, Some("test.pdf".to_string()));
        assert_eq!(result.invoices.len(), 1);
    }

    #[test]
    fn test_aws_account_number_extraction() {
        let text = "Account No:200221655242 (Mlytics Limited-200221655242)\nTotal Amount Due\nUSD 13586.67";
        let invoice = parse_text(text).unwrap();

        assert_eq!(invoice.invoice_number, Some("200221655242".to_string()));
    }

    #[test]
    fn test_account_name_extraction() {
        let text = "Account No: 970363902599 ( mlytics-prdservice2 )\nCompany name: Mlytics Limited\nTotal: $47.59";
        let invoice = parse_text(text).unwrap();

        assert_eq!(invoice.invoice_number, Some("970363902599".to_string()));
        assert_eq!(
            invoice.account_name,
            Some("mlytics-prdservice2".to_string())
        );
    }

    #[test]
    fn test_aws_vendor_detection() {
        let text = "AWS Services Pricing 14,301.76\nTotal: $100.00";
        let invoice = parse_text(text).unwrap();

        assert_eq!(invoice.vendor.name, Some("Amazon Web Services".to_string()));
    }

    #[test]
    fn test_ecloudvalley_vendor_detection() {
        let text = "伊雲谷(香港) ECLOUDVALLEY TECHNOLOGY (HK) LIMITED\nTotal: $100.00";
        let invoice = parse_text(text).unwrap();

        assert_eq!(
            invoice.vendor.name,
            Some("eCloudValley Technology".to_string())
        );
    }

    #[test]
    fn test_customer_extraction_from_company_name() {
        let text = "Company name: Mlytics Limited\nBilling Address: Some Address\nTotal: $100.00";
        let invoice = parse_text(text).unwrap();

        assert_eq!(invoice.customer.name, Some("Mlytics Limited".to_string()));
    }

    #[test]
    fn test_aws_service_charge_subtotal() {
        let text = "AWS Service Charge 1,806.55\nDiscount -108.39\nTotal Amount Due\nUSD 1,698.16";
        let invoice = parse_text(text).unwrap();

        assert_eq!(invoice.subtotal, Some(1806.55));
    }

    #[test]
    fn test_aws_services_pricing_subtotal() {
        let text = "AWS Services Pricing 14,301.76\nDiscount -715.09\nTotal: $13,586.67";
        let invoice = parse_text(text).unwrap();

        assert_eq!(invoice.subtotal, Some(14301.76));
    }

    #[test]
    fn test_line_items_extraction_from_pdf() {
        let result = parse_file("examples/pdf/202512-CH306-7133-970363902599.pdf").unwrap();
        let invoice = &result.invoices[0];

        assert!(!invoice.line_items.is_empty());

        let vpc_item = invoice.line_items.iter().find(|item| {
            item.service_name
                .as_ref()
                .is_some_and(|s| s.contains("Virtual Private Cloud"))
        });
        assert!(vpc_item.is_some());
        assert_eq!(
            vpc_item.unwrap().description,
            "USE1-PublicIPv4:InUseAddress"
        );
    }

    #[test]
    fn test_line_items_have_service_name_and_description() {
        let result = parse_file("examples/pdf/202512-CH306-7133-970363902599.pdf").unwrap();
        let invoice = &result.invoices[0];

        for item in &invoice.line_items {
            assert!(item.service_name.is_some());
            assert!(!item.description.is_empty());
        }
    }

    #[test]
    fn test_line_items_have_unit_price() {
        let result = parse_file("examples/pdf/HPK_AWS_eCv_200221655242_202512.pdf").unwrap();
        let invoice = &result.invoices[0];

        let item_with_price = invoice
            .line_items
            .iter()
            .find(|item| item.unit_price.is_some());
        assert!(item_with_price.is_some());
    }

    #[test]
    fn test_statement_type_detection() {
        let text = "Description of Statement Currency: USD\nTotal: $100.00";
        let invoice = parse_text(text).unwrap();

        assert_eq!(invoice.invoice_type, InvoiceType::Statement);
    }

    #[test]
    fn test_date_format_yyyy_mm_dd() {
        let text = "Invoice Date: 2025-01-15\nTotal: $100.00";
        let invoice = parse_text(text).unwrap();

        assert!(invoice.invoice_date.is_some());
        let date = invoice.invoice_date.unwrap();
        assert_eq!(date.year(), 2025);
        assert_eq!(date.month(), 1);
        assert_eq!(date.day(), 15);
    }

    #[test]
    fn test_date_format_yyyy_slash() {
        let text = "Payment Due Date\n2025/02/25\nTotal: $100.00";
        let invoice = parse_text(text).unwrap();

        assert!(invoice.due_date.is_some());
        let date = invoice.due_date.unwrap();
        assert_eq!(date.year(), 2025);
        assert_eq!(date.month(), 2);
        assert_eq!(date.day(), 25);
    }

    #[test]
    fn test_chinese_colon_support() {
        let text = "Account No:123456789\nCompany name:Test Company\nTotal: $100.00";
        let invoice = parse_text(text).unwrap();

        assert_eq!(invoice.invoice_number, Some("123456789".to_string()));
        assert_eq!(invoice.customer.name, Some("Test Company".to_string()));
    }

    #[test]
    fn test_amount_with_comma_separator() {
        let text = "Grand Total: $13,586.67";
        let invoice = parse_text(text).unwrap();

        assert_eq!(invoice.total_amount, 13586.67);
    }
}