Skip to main content

invoice_parser/
models.rs

1use chrono::NaiveDate;
2use serde::{Deserialize, Serialize};
3
4mod decimal_format {
5    use serde::{self, Deserialize, Deserializer, Serializer};
6
7    pub fn serialize<S>(value: &f64, serializer: S) -> Result<S::Ok, S::Error>
8    where
9        S: Serializer,
10    {
11        let abs = value.abs();
12        if abs == 0.0 {
13            serializer.serialize_f64(0.0)
14        } else if abs < 0.0001 {
15            serializer.serialize_str(
16                format!("{:.10}", value)
17                    .trim_end_matches('0')
18                    .trim_end_matches('.'),
19            )
20        } else if abs < 1.0 {
21            serializer.serialize_str(
22                format!("{:.8}", value)
23                    .trim_end_matches('0')
24                    .trim_end_matches('.'),
25            )
26        } else {
27            serializer.serialize_f64(*value)
28        }
29    }
30
31    pub fn deserialize<'de, D>(deserializer: D) -> Result<f64, D::Error>
32    where
33        D: Deserializer<'de>,
34    {
35        #[derive(Deserialize)]
36        #[serde(untagged)]
37        enum StringOrFloat {
38            String(String),
39            Float(f64),
40        }
41
42        match StringOrFloat::deserialize(deserializer)? {
43            StringOrFloat::String(s) => s.parse().map_err(serde::de::Error::custom),
44            StringOrFloat::Float(f) => Ok(f),
45        }
46    }
47}
48
49mod decimal_format_option {
50    use serde::{self, Deserialize, Deserializer, Serializer};
51
52    pub fn serialize<S>(value: &Option<f64>, serializer: S) -> Result<S::Ok, S::Error>
53    where
54        S: Serializer,
55    {
56        match value {
57            Some(v) => {
58                let abs = v.abs();
59                if abs == 0.0 {
60                    serializer.serialize_f64(0.0)
61                } else if abs < 0.0001 {
62                    serializer.serialize_str(
63                        format!("{:.10}", v)
64                            .trim_end_matches('0')
65                            .trim_end_matches('.'),
66                    )
67                } else if abs < 1.0 {
68                    serializer.serialize_str(
69                        format!("{:.8}", v)
70                            .trim_end_matches('0')
71                            .trim_end_matches('.'),
72                    )
73                } else {
74                    serializer.serialize_f64(*v)
75                }
76            }
77            None => serializer.serialize_none(),
78        }
79    }
80
81    pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<f64>, D::Error>
82    where
83        D: Deserializer<'de>,
84    {
85        #[derive(Deserialize)]
86        #[serde(untagged)]
87        enum StringOrFloat {
88            String(String),
89            Float(f64),
90        }
91
92        let opt: Option<StringOrFloat> = Option::deserialize(deserializer)?;
93        match opt {
94            Some(StringOrFloat::String(s)) => s.parse().map(Some).map_err(serde::de::Error::custom),
95            Some(StringOrFloat::Float(f)) => Ok(Some(f)),
96            None => Ok(None),
97        }
98    }
99}
100
101#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
102pub enum DocumentFormat {
103    #[default]
104    Unknown,
105    AwsDirect,
106    ECloudValleyAws,
107    MicrofusionAliyun,
108    AliyunDirect,
109    UCloud,
110    GoogleCloud,
111    Azure,
112    Lokalise,
113    Sentry,
114    Mux,
115    MlyticsConsolidated,
116    AzureCsp,
117    AliyunUsageDetail,
118}
119
120impl std::fmt::Display for DocumentFormat {
121    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
122        match self {
123            DocumentFormat::Unknown => write!(f, "Unknown"),
124            DocumentFormat::AwsDirect => write!(f, "AWS Direct"),
125            DocumentFormat::ECloudValleyAws => write!(f, "eCloudValley AWS"),
126            DocumentFormat::MicrofusionAliyun => write!(f, "Microfusion Aliyun"),
127            DocumentFormat::AliyunDirect => write!(f, "Alibaba Cloud Direct"),
128            DocumentFormat::UCloud => write!(f, "UCloud"),
129            DocumentFormat::GoogleCloud => write!(f, "Google Cloud"),
130            DocumentFormat::Azure => write!(f, "Microsoft Azure"),
131            DocumentFormat::Lokalise => write!(f, "Lokalise"),
132            DocumentFormat::Sentry => write!(f, "Sentry"),
133            DocumentFormat::Mux => write!(f, "Mux"),
134            DocumentFormat::MlyticsConsolidated => write!(f, "Mlytics Consolidated"),
135            DocumentFormat::AzureCsp => write!(f, "Azure CSP"),
136            DocumentFormat::AliyunUsageDetail => write!(f, "Aliyun Usage Detail"),
137        }
138    }
139}
140
141/// 货币类型枚举
142#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
143pub enum Currency {
144    /// 美元
145    #[default]
146    USD,
147    /// 欧元
148    EUR,
149    /// 英镑
150    GBP,
151    /// 日元
152    JPY,
153    /// 人民币
154    CNY,
155    /// 港币
156    HKD,
157    /// 新加坡元
158    SGD,
159    /// 澳元
160    AUD,
161    /// 加元
162    CAD,
163    /// 瑞士法郎
164    CHF,
165    /// 其他货币
166    Other(String),
167}
168
169impl From<&str> for Currency {
170    fn from(s: &str) -> Self {
171        match s.to_uppercase().as_str() {
172            "USD" | "$" | "US$" => Currency::USD,
173            "EUR" | "€" => Currency::EUR,
174            "GBP" | "£" => Currency::GBP,
175            "JPY" | "¥" | "YEN" => Currency::JPY,
176            "CNY" | "RMB" | "元" => Currency::CNY,
177            "HKD" | "HK$" => Currency::HKD,
178            "SGD" | "S$" => Currency::SGD,
179            "AUD" | "A$" => Currency::AUD,
180            "CAD" | "C$" => Currency::CAD,
181            "CHF" => Currency::CHF,
182            other => Currency::Other(other.to_string()),
183        }
184    }
185}
186
187/// 发票类型枚举
188#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
189pub enum InvoiceType {
190    /// 标准发票
191    #[default]
192    Standard,
193    /// 贷项通知单(退款/折让凭证)
194    CreditNote,
195    /// 借项通知单(补收款凭证)
196    DebitNote,
197    /// 形式发票(报价/预开发票)
198    ProformaInvoice,
199    /// 商业发票(国际贸易)
200    CommercialInvoice,
201    /// 收据
202    Receipt,
203    /// 账单
204    Bill,
205    /// 对账单(如 AWS 账单)
206    Statement,
207    /// 未知类型
208    Unknown,
209}
210
211/// 地址信息结构体
212#[derive(Debug, Clone, Serialize, Deserialize, Default)]
213pub struct Address {
214    /// 地址第一行(街道地址)
215    pub line1: Option<String>,
216    /// 地址第二行(门牌号、楼层等)
217    pub line2: Option<String>,
218    /// 城市
219    pub city: Option<String>,
220    /// 州/省
221    pub state: Option<String>,
222    /// 邮政编码
223    pub postal_code: Option<String>,
224    /// 国家
225    pub country: Option<String>,
226}
227
228impl Address {
229    /// 返回完整地址字符串,各部分用逗号分隔
230    pub fn full_address(&self) -> String {
231        [
232            self.line1.as_deref(),
233            self.line2.as_deref(),
234            self.city.as_deref(),
235            self.state.as_deref(),
236            self.postal_code.as_deref(),
237            self.country.as_deref(),
238        ]
239        .iter()
240        .filter_map(|&s| s)
241        .collect::<Vec<_>>()
242        .join(", ")
243    }
244}
245
246/// 交易方信息结构体(供应商或客户)
247#[derive(Debug, Clone, Serialize, Deserialize, Default)]
248pub struct Party {
249    /// 公司/个人名称
250    pub name: Option<String>,
251    /// 税务识别号(如统一编号、VAT号等)
252    pub tax_id: Option<String>,
253    /// 地址信息
254    pub address: Option<Address>,
255    /// 电子邮件
256    pub email: Option<String>,
257    /// 电话号码
258    pub phone: Option<String>,
259}
260
261/// 发票行项目结构体(单个商品/服务明细)
262#[derive(Debug, Clone, Serialize, Deserialize, Default)]
263pub struct LineItem {
264    /// 行号
265    pub line_number: Option<u32>,
266    /// 服务/项目名称(如:Amazon CloudFront)
267    pub service_name: Option<String>,
268    /// 使用类型/描述(如:US-Requests-Tier2-HTTPS)
269    pub description: String,
270    /// 数量
271    #[serde(with = "decimal_format_option")]
272    pub quantity: Option<f64>,
273    /// 单位(如:个、件、小时等)
274    pub unit: Option<String>,
275    /// 单价
276    #[serde(with = "decimal_format_option")]
277    pub unit_price: Option<f64>,
278    /// 折扣金额
279    #[serde(with = "decimal_format_option")]
280    pub discount: Option<f64>,
281    /// 税率(百分比)
282    #[serde(with = "decimal_format_option")]
283    pub tax_rate: Option<f64>,
284    /// 税额
285    #[serde(with = "decimal_format_option")]
286    pub tax_amount: Option<f64>,
287    /// 金额(数量 × 单价 - 折扣 + 税额)
288    #[serde(with = "decimal_format")]
289    pub amount: f64,
290}
291
292impl LineItem {
293    /// 计算行项目金额:数量 × 单价 - 折扣 + 税额
294    pub fn calculate_amount(&self) -> f64 {
295        let qty = self.quantity.unwrap_or(1.0);
296        let price = self.unit_price.unwrap_or(self.amount);
297        let discount = self.discount.unwrap_or(0.0);
298        let tax = self.tax_amount.unwrap_or(0.0);
299
300        (qty * price) - discount + tax
301    }
302
303    /// 验证单价×数量是否等于金额
304    /// 返回 (是否有效, 计算值, 差异值)
305    pub fn validate_amount(&self) -> LineItemValidation {
306        let (qty, price) = match (self.quantity, self.unit_price) {
307            (Some(q), Some(p)) => (q, p),
308            _ => {
309                return LineItemValidation {
310                    is_valid: true,
311                    can_validate: false,
312                    calculated_amount: None,
313                    difference: None,
314                    difference_percent: None,
315                }
316            }
317        };
318
319        let calculated = qty * price;
320        let diff = (self.amount - calculated).abs();
321        let diff_percent = if calculated.abs() > 0.0001 {
322            (diff / calculated.abs()) * 100.0
323        } else {
324            0.0
325        };
326
327        LineItemValidation {
328            is_valid: diff < 0.01 || diff_percent < 1.0,
329            can_validate: true,
330            calculated_amount: Some(calculated),
331            difference: Some(self.amount - calculated),
332            difference_percent: Some(diff_percent),
333        }
334    }
335}
336
337/// 行项目验证结果
338#[derive(Debug, Clone, Serialize, Deserialize)]
339pub struct LineItemValidation {
340    /// 是否有效(差异在允许范围内)
341    pub is_valid: bool,
342    /// 是否可以验证(有单价和数量)
343    pub can_validate: bool,
344    /// 计算金额(单价×数量)
345    pub calculated_amount: Option<f64>,
346    /// 差异值(实际金额 - 计算金额)
347    pub difference: Option<f64>,
348    /// 差异百分比
349    pub difference_percent: Option<f64>,
350}
351
352/// 税务汇总结构体
353#[derive(Debug, Clone, Serialize, Deserialize, Default)]
354pub struct TaxSummary {
355    /// 税种(如:增值税、营业税、GST等)
356    pub tax_type: Option<String>,
357    /// 税率(百分比)
358    pub tax_rate: Option<f64>,
359    /// 应税金额(税基)
360    pub taxable_amount: Option<f64>,
361    /// 税额
362    pub tax_amount: f64,
363}
364
365/// 付款信息结构体
366#[derive(Debug, Clone, Serialize, Deserialize, Default)]
367pub struct PaymentInfo {
368    /// 付款方式(如:银行转账、信用卡、支票等)
369    pub method: Option<String>,
370    /// 银行名称
371    pub bank_name: Option<String>,
372    /// 银行账号
373    pub account_number: Option<String>,
374    /// 银行路由号(美国银行系统)
375    pub routing_number: Option<String>,
376    /// 国际银行账号(IBAN)
377    pub iban: Option<String>,
378    /// SWIFT/BIC 代码(国际汇款)
379    pub swift_code: Option<String>,
380    /// 付款参考号/备注
381    pub reference: Option<String>,
382}
383
384/// 发票主结构体
385///
386/// 包含发票的所有核心信息,支持多种发票格式的解析结果存储
387#[derive(Debug, Clone, Serialize, Deserialize, Default)]
388pub struct Invoice {
389    pub document_format: DocumentFormat,
390    pub invoice_type: InvoiceType,
391    /// 发票号码
392    pub invoice_number: Option<String>,
393    /// 账户名称/项目别名(如 AWS Account No 后括号内的名称)
394    pub account_name: Option<String>,
395    /// 客户ID
396    pub customer_id: Option<String>,
397    /// 账单年月(格式:YYYY-MM,如 2025-01)
398    pub billing_period: Option<String>,
399    /// 发票日期
400    pub invoice_date: Option<NaiveDate>,
401    /// 付款截止日期
402    pub due_date: Option<NaiveDate>,
403    /// 货币类型
404    pub currency: Currency,
405    /// 供应商/卖方信息
406    pub vendor: Party,
407    /// 客户/买方信息
408    pub customer: Party,
409    /// 行项目列表(商品/服务明细)
410    pub line_items: Vec<LineItem>,
411    /// 小计金额(税前)
412    pub subtotal: Option<f64>,
413    /// 折扣金额(负数表示折扣)
414    pub discount_amount: Option<f64>,
415    /// 折扣比例(百分比,如 5.0 表示 5%)
416    pub discount_rate: Option<f64>,
417    /// 税务汇总列表
418    pub tax_summaries: Vec<TaxSummary>,
419    /// 总税额
420    pub total_tax: Option<f64>,
421    /// 总金额(含税)
422    pub total_amount: f64,
423    /// 已付金额
424    pub amount_paid: Option<f64>,
425    /// 应付金额(未付余额)
426    pub amount_due: Option<f64>,
427    /// 付款信息
428    pub payment_info: Option<PaymentInfo>,
429    /// 备注/附注
430    pub notes: Option<String>,
431    /// 原始文本(PDF提取的原文)
432    pub raw_text: Option<String>,
433    /// 元数据(扩展字段,如账单周期等)
434    pub metadata: std::collections::HashMap<String, String>,
435}
436
437impl Invoice {
438    /// 创建新的空发票实例
439    pub fn new() -> Self {
440        Self::default()
441    }
442
443    /// 序列化为格式化的 JSON 字符串
444    pub fn to_json(&self) -> Result<String, serde_json::Error> {
445        serde_json::to_string_pretty(self)
446    }
447
448    /// 序列化为紧凑的 JSON 字符串
449    pub fn to_json_compact(&self) -> Result<String, serde_json::Error> {
450        serde_json::to_string(self)
451    }
452
453    /// 从 JSON 字符串反序列化
454    pub fn from_json(json: &str) -> Result<Self, serde_json::Error> {
455        serde_json::from_str(json)
456    }
457
458    /// 根据行项目计算小计金额
459    pub fn calculate_subtotal(&self) -> f64 {
460        self.line_items.iter().map(|item| item.amount).sum()
461    }
462
463    /// 根据税务汇总计算总税额
464    pub fn calculate_total_tax(&self) -> f64 {
465        self.tax_summaries.iter().map(|t| t.tax_amount).sum()
466    }
467
468    /// 判断发票是否已付清
469    pub fn is_paid(&self) -> bool {
470        match (self.amount_paid, self.amount_due) {
471            (Some(paid), _) if paid >= self.total_amount => true,
472            (_, Some(due)) if due <= 0.0 => true,
473            _ => false,
474        }
475    }
476
477    pub fn validate_line_items(&self) -> InvoiceValidation {
478        let mut validations = Vec::new();
479        let mut invalid_count = 0;
480        let mut validatable_count = 0;
481
482        for (idx, item) in self.line_items.iter().enumerate() {
483            let validation = item.validate_amount();
484            if validation.can_validate {
485                validatable_count += 1;
486                if !validation.is_valid {
487                    invalid_count += 1;
488                }
489            }
490            validations.push((idx, item.description.clone(), validation));
491        }
492
493        let line_items_sum = self.calculate_subtotal();
494        let subtotal_diff = self.subtotal.map(|s| {
495            let diff = s - line_items_sum;
496            let pct = if s.abs() > 0.0001 {
497                (diff.abs() / s.abs()) * 100.0
498            } else {
499                0.0
500            };
501            (diff, pct)
502        });
503
504        InvoiceValidation {
505            all_valid: invalid_count == 0 && Self::is_subtotal_valid(subtotal_diff),
506            subtotal_valid: Self::is_subtotal_valid(subtotal_diff),
507            total_items: self.line_items.len(),
508            validatable_items: validatable_count,
509            invalid_items: invalid_count,
510            line_items_sum,
511            subtotal: self.subtotal,
512            subtotal_difference: subtotal_diff.map(|(d, _)| d),
513            subtotal_difference_percent: subtotal_diff.map(|(_, p)| p),
514            item_validations: validations,
515        }
516    }
517
518    fn is_subtotal_valid(subtotal_diff: Option<(f64, f64)>) -> bool {
519        const MAX_ALLOWED_DIFFERENCE_PERCENT: f64 = 1.0;
520        match subtotal_diff {
521            Some((_, pct)) => pct <= MAX_ALLOWED_DIFFERENCE_PERCENT,
522            None => true,
523        }
524    }
525}
526
527#[derive(Debug, Clone)]
528pub struct InvoiceValidation {
529    pub all_valid: bool,
530    pub subtotal_valid: bool,
531    pub total_items: usize,
532    pub validatable_items: usize,
533    pub invalid_items: usize,
534    pub line_items_sum: f64,
535    pub subtotal: Option<f64>,
536    pub subtotal_difference: Option<f64>,
537    pub subtotal_difference_percent: Option<f64>,
538    pub item_validations: Vec<(usize, String, LineItemValidation)>,
539}
540
541impl InvoiceValidation {
542    pub fn print_report(&self) {
543        println!("=== Invoice Validation Report ===");
544        println!("Total items: {}", self.total_items);
545        println!("Validatable items: {}", self.validatable_items);
546        println!("Invalid items: {}", self.invalid_items);
547        println!("Line items sum: {:.2}", self.line_items_sum);
548
549        if let Some(subtotal) = self.subtotal {
550            println!("Invoice subtotal: {:.2}", subtotal);
551            if let (Some(diff), Some(pct)) =
552                (self.subtotal_difference, self.subtotal_difference_percent)
553            {
554                println!("Difference: {:.2} ({:.2}%)", diff, pct);
555                if !self.subtotal_valid {
556                    println!("ERROR: Line items sum does not match subtotal (>1% difference)");
557                }
558            }
559        }
560
561        let invalid_items: Vec<_> = self
562            .item_validations
563            .iter()
564            .filter(|(_, _, v)| v.can_validate && !v.is_valid)
565            .collect();
566
567        if !invalid_items.is_empty() {
568            println!("\nInvalid line items:");
569            for (idx, desc, v) in invalid_items {
570                println!(
571                    "  Line {}: {} | calculated: {:.4} | diff: {:.4}",
572                    idx + 1,
573                    desc,
574                    v.calculated_amount.unwrap_or(0.0),
575                    v.difference.unwrap_or(0.0)
576                );
577            }
578        }
579    }
580}
581
582/// 解析结果结构体
583///
584/// 封装解析操作的输出,支持单个或多个发票,并包含警告信息
585#[derive(Debug, Clone, Serialize, Deserialize)]
586pub struct ParseResult {
587    /// 解析出的发票列表
588    pub invoices: Vec<Invoice>,
589    /// 源文件路径
590    pub source_file: Option<String>,
591    /// 解析过程中的警告信息
592    pub parse_warnings: Vec<String>,
593}
594
595impl ParseResult {
596    /// 创建包含单个发票的解析结果
597    pub fn single(invoice: Invoice) -> Self {
598        Self {
599            invoices: vec![invoice],
600            source_file: None,
601            parse_warnings: Vec::new(),
602        }
603    }
604
605    /// 创建包含多个发票的解析结果
606    pub fn multiple(invoices: Vec<Invoice>) -> Self {
607        Self {
608            invoices,
609            source_file: None,
610            parse_warnings: Vec::new(),
611        }
612    }
613
614    /// 设置源文件路径(链式调用)
615    pub fn with_source(mut self, source: impl Into<String>) -> Self {
616        self.source_file = Some(source.into());
617        self
618    }
619
620    /// 添加警告信息(链式调用)
621    pub fn with_warning(mut self, warning: impl Into<String>) -> Self {
622        self.parse_warnings.push(warning.into());
623        self
624    }
625}