Skip to main content

invoice_parser/
lib.rs

1pub mod error;
2pub mod extractors;
3pub mod models;
4pub mod parsers;
5
6pub use error::{InvoiceParserError, Result};
7pub use extractors::{PdfDocument, PdfExtractor, SheetData, XlsxExtractor};
8pub use models::{
9    Address, Currency, DocumentFormat, Invoice, InvoiceType, InvoiceValidation, LineItem,
10    LineItemValidation, ParseResult, Party, PaymentInfo, TaxSummary,
11};
12pub use parsers::InvoiceParser;
13
14use std::path::Path;
15
16pub fn parse_file<P: AsRef<Path>>(path: P) -> Result<ParseResult> {
17    InvoiceParser::parse_file(path)
18}
19
20pub fn parse_pdf<P: AsRef<Path>>(path: P) -> Result<ParseResult> {
21    InvoiceParser::parse_pdf(path)
22}
23
24pub fn parse_pdf_bytes(bytes: &[u8]) -> Result<ParseResult> {
25    InvoiceParser::parse_pdf_bytes(bytes)
26}
27
28pub fn parse_xlsx<P: AsRef<Path>>(path: P) -> Result<ParseResult> {
29    InvoiceParser::parse_xlsx(path)
30}
31
32pub fn parse_text(text: &str) -> Result<Invoice> {
33    InvoiceParser::parse_text(text)
34}
35
36#[cfg(test)]
37mod tests {
38    use super::*;
39    use chrono::Datelike;
40
41    #[test]
42    fn test_parse_simple_invoice_text() {
43        let text = r#"
44            INVOICE
45            Invoice Number: INV-2024-001
46            Invoice Date: 2024-01-15
47            Due Date: 2024-02-15
48            
49            Subtotal: $1,000.00
50            Tax: $100.00
51            Total: $1,100.00
52        "#;
53
54        let invoice = parse_text(text).unwrap();
55
56        assert_eq!(invoice.invoice_number, Some("INV-2024-001".to_string()));
57        assert_eq!(invoice.invoice_type, InvoiceType::Standard);
58        assert_eq!(invoice.total_amount, 1100.0);
59        assert_eq!(invoice.subtotal, Some(1000.0));
60        assert_eq!(invoice.total_tax, Some(100.0));
61    }
62
63    #[test]
64    fn test_parse_invoice_with_currency() {
65        let text = "Invoice #12345\nTotal: EUR 500.00";
66        let invoice = parse_text(text).unwrap();
67
68        assert_eq!(invoice.invoice_number, Some("12345".to_string()));
69        assert_eq!(invoice.currency, Currency::EUR);
70    }
71
72    #[test]
73    fn test_detect_credit_note() {
74        let text = "CREDIT NOTE\nReference: CN-001\nTotal: $200.00";
75        let invoice = parse_text(text).unwrap();
76
77        assert_eq!(invoice.invoice_type, InvoiceType::CreditNote);
78    }
79
80    #[test]
81    fn test_detect_receipt() {
82        let text = "RECEIPT\nReceipt #R-001\nAmount: $50.00";
83        let invoice = parse_text(text).unwrap();
84
85        assert_eq!(invoice.invoice_type, InvoiceType::Receipt);
86    }
87
88    #[test]
89    fn test_currency_from_str() {
90        assert_eq!(Currency::from("USD"), Currency::USD);
91        assert_eq!(Currency::from("$"), Currency::USD);
92        assert_eq!(Currency::from("EUR"), Currency::EUR);
93        assert_eq!(Currency::from("€"), Currency::EUR);
94        assert_eq!(Currency::from("GBP"), Currency::GBP);
95        assert_eq!(Currency::from("£"), Currency::GBP);
96        assert_eq!(Currency::from("CNY"), Currency::CNY);
97        assert_eq!(Currency::from("XYZ"), Currency::Other("XYZ".to_string()));
98    }
99
100    #[test]
101    fn test_invoice_to_json() {
102        let text = "Invoice #TEST-001\nTotal: $100.00";
103        let invoice = parse_text(text).unwrap();
104        let json = invoice.to_json().unwrap();
105
106        assert!(json.contains("TEST-001"));
107        assert!(json.contains("100"));
108    }
109
110    #[test]
111    fn test_empty_document_error() {
112        let result = parse_text("");
113        assert!(result.is_err());
114    }
115
116    #[test]
117    fn test_parse_result_with_source() {
118        let invoice = Invoice::new();
119        let result = ParseResult::single(invoice).with_source("test.pdf");
120
121        assert_eq!(result.source_file, Some("test.pdf".to_string()));
122        assert_eq!(result.invoices.len(), 1);
123    }
124
125    #[test]
126    fn test_aws_account_number_extraction() {
127        let text = "Account No:200221655242 (Mlytics Limited-200221655242)\nTotal Amount Due\nUSD 13586.67";
128        let invoice = parse_text(text).unwrap();
129
130        assert_eq!(invoice.invoice_number, Some("200221655242".to_string()));
131    }
132
133    #[test]
134    fn test_account_name_extraction() {
135        let text = "Account No: 970363902599 ( mlytics-prdservice2 )\nCompany name: Mlytics Limited\nTotal: $47.59";
136        let invoice = parse_text(text).unwrap();
137
138        assert_eq!(invoice.invoice_number, Some("970363902599".to_string()));
139        assert_eq!(
140            invoice.account_name,
141            Some("mlytics-prdservice2".to_string())
142        );
143    }
144
145    #[test]
146    fn test_aws_vendor_detection() {
147        let text = "AWS Services Pricing 14,301.76\nTotal: $100.00";
148        let invoice = parse_text(text).unwrap();
149
150        assert_eq!(invoice.vendor.name, Some("Amazon Web Services".to_string()));
151    }
152
153    #[test]
154    fn test_ecloudvalley_vendor_detection() {
155        let text = "伊雲谷(香港) ECLOUDVALLEY TECHNOLOGY (HK) LIMITED\nTotal: $100.00";
156        let invoice = parse_text(text).unwrap();
157
158        assert_eq!(
159            invoice.vendor.name,
160            Some("eCloudValley Technology".to_string())
161        );
162    }
163
164    #[test]
165    fn test_customer_extraction_from_company_name() {
166        let text = "Company name: Mlytics Limited\nBilling Address: Some Address\nTotal: $100.00";
167        let invoice = parse_text(text).unwrap();
168
169        assert_eq!(invoice.customer.name, Some("Mlytics Limited".to_string()));
170    }
171
172    #[test]
173    fn test_aws_service_charge_subtotal() {
174        let text = "AWS Service Charge 1,806.55\nDiscount -108.39\nTotal Amount Due\nUSD 1,698.16";
175        let invoice = parse_text(text).unwrap();
176
177        assert_eq!(invoice.subtotal, Some(1806.55));
178    }
179
180    #[test]
181    fn test_aws_services_pricing_subtotal() {
182        let text = "AWS Services Pricing 14,301.76\nDiscount -715.09\nTotal: $13,586.67";
183        let invoice = parse_text(text).unwrap();
184
185        assert_eq!(invoice.subtotal, Some(14301.76));
186    }
187
188    #[test]
189    fn test_line_items_extraction_from_pdf() {
190        let result = parse_file("examples/pdf/202512-CH306-7133-970363902599.pdf").unwrap();
191        let invoice = &result.invoices[0];
192
193        assert!(!invoice.line_items.is_empty());
194
195        let vpc_item = invoice.line_items.iter().find(|item| {
196            item.service_name
197                .as_ref()
198                .is_some_and(|s| s.contains("Virtual Private Cloud"))
199        });
200        assert!(vpc_item.is_some());
201        assert_eq!(
202            vpc_item.unwrap().description,
203            "USE1-PublicIPv4:InUseAddress"
204        );
205    }
206
207    #[test]
208    fn test_line_items_have_service_name_and_description() {
209        let result = parse_file("examples/pdf/202512-CH306-7133-970363902599.pdf").unwrap();
210        let invoice = &result.invoices[0];
211
212        for item in &invoice.line_items {
213            assert!(item.service_name.is_some());
214            assert!(!item.description.is_empty());
215        }
216    }
217
218    #[test]
219    fn test_line_items_have_unit_price() {
220        let result = parse_file("examples/pdf/HPK_AWS_eCv_200221655242_202512.pdf").unwrap();
221        let invoice = &result.invoices[0];
222
223        let item_with_price = invoice
224            .line_items
225            .iter()
226            .find(|item| item.unit_price.is_some());
227        assert!(item_with_price.is_some());
228    }
229
230    #[test]
231    fn test_statement_type_detection() {
232        let text = "Description of Statement Currency: USD\nTotal: $100.00";
233        let invoice = parse_text(text).unwrap();
234
235        assert_eq!(invoice.invoice_type, InvoiceType::Statement);
236    }
237
238    #[test]
239    fn test_date_format_yyyy_mm_dd() {
240        let text = "Invoice Date: 2025-01-15\nTotal: $100.00";
241        let invoice = parse_text(text).unwrap();
242
243        assert!(invoice.invoice_date.is_some());
244        let date = invoice.invoice_date.unwrap();
245        assert_eq!(date.year(), 2025);
246        assert_eq!(date.month(), 1);
247        assert_eq!(date.day(), 15);
248    }
249
250    #[test]
251    fn test_date_format_yyyy_slash() {
252        let text = "Payment Due Date\n2025/02/25\nTotal: $100.00";
253        let invoice = parse_text(text).unwrap();
254
255        assert!(invoice.due_date.is_some());
256        let date = invoice.due_date.unwrap();
257        assert_eq!(date.year(), 2025);
258        assert_eq!(date.month(), 2);
259        assert_eq!(date.day(), 25);
260    }
261
262    #[test]
263    fn test_chinese_colon_support() {
264        let text = "Account No:123456789\nCompany name:Test Company\nTotal: $100.00";
265        let invoice = parse_text(text).unwrap();
266
267        assert_eq!(invoice.invoice_number, Some("123456789".to_string()));
268        assert_eq!(invoice.customer.name, Some("Test Company".to_string()));
269    }
270
271    #[test]
272    fn test_amount_with_comma_separator() {
273        let text = "Grand Total: $13,586.67";
274        let invoice = parse_text(text).unwrap();
275
276        assert_eq!(invoice.total_amount, 13586.67);
277    }
278}