Skip to main content

qmt_parser/
finance.rs

1//! QMT 本地财务 `.DAT` 解析。
2//!
3//! 这里的字段命名来自仓库内样本和逆向结果,部分列仍保留“尽量稳定但未完全确认”
4//! 的语义。对需要结构化列名的场景,优先使用 [`FinanceRecord::column_names`] 和
5//! [`FinanceRecord::named_values`],不要自行硬编码列顺序。
6
7use std::fs::File;
8use std::io;
9use std::path::Path;
10
11use byteorder::{ByteOrder, LittleEndian};
12use chrono::{DateTime, FixedOffset, TimeZone};
13use memmap2::MmapOptions;
14use thiserror::Error;
15
16const STRIDE_BALANCE: usize = 1264; // 7001
17const STRIDE_INCOME: usize = 664; // 7002
18const STRIDE_CASHFLOW: usize = 920; // 7003
19const STRIDE_RATIOS: usize = 344; // 7008
20const STRIDE_CAPITAL: usize = 56; // 7004
21const STRIDE_HOLDER: usize = 64; // 7005
22const STRIDE_TOP_HOLDER: usize = 416; // 7006, 7007
23const COLUMNS_BALANCE: usize = 156;
24const COLUMNS_INCOME: usize = 80;
25const COLUMNS_CASHFLOW: usize = 111;
26const COLUMNS_RATIOS: usize = 41;
27
28// 有效时间戳范围 (1990年 - 2050年, 毫秒)
29const MIN_VALID_TS: i64 = 631_152_000_000;
30const MAX_VALID_TS: i64 = 2_524_608_000_000;
31const QMT_NAN_HEX: u64 = 0x7FEFFFFFFFFFFFFF;
32
33// --- 错误定义 ---
34/// 财务解析错误。
35#[derive(Debug, Error)]
36pub enum FinanceError {
37    /// 文件读取失败。
38    #[error("IO Error: {0}")]
39    Io(#[from] io::Error),
40    /// 文件扩展名不是 `.dat` 或 `.DAT`。
41    #[error("Invalid File Extension: {0}")]
42    InvalidExtension(String),
43    /// 文件名中的 type id 不是当前支持的财务类型。
44    #[error("Unsupported File Type ID: {0}")]
45    UnsupportedType(u16),
46    /// 解析过程中发现记录布局或字段值异常。
47    #[error("Parse Error: {0}")]
48    Parse(String),
49}
50
51/// 北京时间类型(UTC+8)。
52pub type BjDateTime = DateTime<FixedOffset>;
53
54/// 一条财务记录。
55///
56/// 每条记录都包含财务文件类型、报告日期、公告日期,以及对应的 typed 数据载荷。
57#[derive(Debug, Clone)]
58pub struct FinanceRecord {
59    /// 当前记录对应的财务文件类型。
60    pub file_type: FileType,
61    /// 报告期日期。
62    pub report_date: BjDateTime,
63    /// 公告日期。
64    pub announce_date: BjDateTime,
65    /// 具体数据载荷。
66    pub data: FinanceData,
67}
68
69/// 不同类型的财务数据载荷。
70#[derive(Debug, Clone)]
71pub enum FinanceData {
72    /// 7001、7002、7003 财务报表数值列。
73    Report {
74        /// 按文件类型对应 schema 排列的数值列。
75        columns: Vec<f64>,
76    },
77    /// 7004 股本结构。
78    Capital {
79        /// 总股本。
80        total_share: f64,
81        /// 流通股本。
82        flow_share: f64,
83        /// 限售股本。
84        restricted: f64,
85        /// 自由流通股本。
86        free_float_share: f64,
87    },
88    /// 7005 股东人数。
89    HolderCount {
90        /// 股东总数。
91        total_holders: i64,
92        /// A 股股东数。
93        a_holders: i64,
94        /// B 股股东数。
95        b_holders: i64,
96        /// H 股股东数。
97        h_holders: i64,
98        /// 流通股股东数。
99        float_holders: i64,
100        /// 其他股东数。
101        other_holders: i64,
102    },
103    /// 7006、7007 十大股东或十大流通股东。
104    TopHolder {
105        /// 记录中的股东列表。
106        holders: Vec<Shareholder>,
107    },
108    /// 7008 财务比率数值列。
109    Ratios {
110        /// 按比率表 schema 排列的数值列。
111        ratios: Vec<f64>,
112    },
113}
114
115/// 单个股东条目。
116#[derive(Debug, Clone)]
117pub struct Shareholder {
118    /// 股东名称。
119    pub name: String,
120    /// 股东类型。
121    pub holder_type: String,
122    /// 持股数量。
123    pub hold_amount: f64,
124    /// 变动原因。
125    pub change_reason: String,
126    /// 持股比例,如 `0.05` 代表 `5%`。
127    pub hold_ratio: f64,
128    /// 股份性质,例如“流通A股”。
129    pub share_type: String,
130    /// 股东排名。
131    pub rank: u32,
132}
133
134#[derive(Debug, Clone, Copy, PartialEq)]
135/// 财务文件类型枚举。
136pub enum FileType {
137    /// 7001 资产负债表。
138    BalanceSheet = 7001,
139    /// 7002 利润表。
140    Income = 7002,
141    /// 7003 现金流量表。
142    CashFlow = 7003,
143    /// 7004 股本结构。
144    Capital = 7004,
145    /// 7005 股东人数。
146    HolderCount = 7005,
147    /// 7006 十大流通股东。
148    TopFlowHolder = 7006,
149    /// 7007 十大股东。
150    TopHolder = 7007,
151    /// 7008 财务比率。
152    Ratios = 7008,
153}
154
155impl FileType {
156    /// 从文件名中的 type id 映射为 [`FileType`]。
157    pub fn from_id(id: u16) -> Option<Self> {
158        match id {
159            7001 => Some(Self::BalanceSheet),
160            7002 => Some(Self::Income),
161            7003 => Some(Self::CashFlow),
162            7004 => Some(Self::Capital),
163            7005 => Some(Self::HolderCount),
164            7006 => Some(Self::TopFlowHolder),
165            7007 => Some(Self::TopHolder),
166            7008 => Some(Self::Ratios),
167            _ => None,
168        }
169    }
170}
171
172const BALANCE_COLUMN_NAMES: [&str; 156] = [
173    "internal_shoule_recv",
174    "fixed_capital_clearance",
175    "should_pay_money",
176    "settlement_payment",
177    "receivable_premium",
178    "accounts_receivable_reinsurance",
179    "reinsurance_contract_reserve",
180    "dividends_payable",
181    "tax_rebate_for_export",
182    "subsidies_receivable",
183    "deposit_receivable",
184    "apportioned_cost",
185    "profit_and_current_assets_with_deal",
186    "current_assets_one_year",
187    "long_term_receivables",
188    "other_long_term_investments",
189    "original_value_of_fixed_assets",
190    "net_value_of_fixed_assets",
191    "depreciation_reserves_of_fixed_assets",
192    "productive_biological_assets",
193    "public_welfare_biological_assets",
194    "oil_and_gas_assets",
195    "development_expenditure",
196    "right_of_split_share_distribution",
197    "other_non_mobile_assets",
198    "handling_fee_and_commission",
199    "other_payables",
200    "margin_payable",
201    "internal_accounts_payable",
202    "advance_cost",
203    "insurance_contract_reserve",
204    "broker_buying_and_selling_securities",
205    "acting_underwriting_securities",
206    "international_ticket_settlement",
207    "domestic_ticket_settlement",
208    "deferred_income",
209    "short_term_bonds_payable",
210    "long_term_deferred_income",
211    "undetermined_investment_losses",
212    "quasi_distribution_of_cash_dividends",
213    "provisions_not",
214    "cust_bank_dep",
215    "provisions",
216    "less_tsy_stk",
217    "cash_equivalents",
218    "loans_to_oth_banks",
219    "tradable_fin_assets",
220    "derivative_fin_assets",
221    "bill_receivable",
222    "account_receivable",
223    "advance_payment",
224    "int_rcv",
225    "other_receivable",
226    "red_monetary_cap_for_sale",
227    "agency_bus_assets",
228    "inventories",
229    "other_current_assets",
230    "total_current_assets",
231    "loans_and_adv_granted",
232    "fin_assets_avail_for_sale",
233    "held_to_mty_invest",
234    "long_term_eqy_invest",
235    "invest_real_estate",
236    "accumulated_depreciation",
237    "fix_assets",
238    "constru_in_process",
239    "construction_materials",
240    "long_term_liabilities",
241    "intang_assets",
242    "goodwill",
243    "long_deferred_expense",
244    "deferred_tax_assets",
245    "total_non_current_assets",
246    "tot_assets",
247    "shortterm_loan",
248    "borrow_central_bank",
249    "loans_oth_banks",
250    "tradable_fin_liab",
251    "derivative_fin_liab",
252    "notes_payable",
253    "accounts_payable",
254    "advance_peceipts",
255    "fund_sales_fin_assets_rp",
256    "empl_ben_payable",
257    "taxes_surcharges_payable",
258    "int_payable",
259    "dividend_payable",
260    "other_payable",
261    "non_current_liability_in_one_year",
262    "other_current_liability",
263    "total_current_liability",
264    "long_term_loans",
265    "bonds_payable",
266    "longterm_account_payable",
267    "grants_received",
268    "deferred_tax_liab",
269    "other_non_current_liabilities",
270    "non_current_liabilities",
271    "tot_liab",
272    "cap_stk",
273    "cap_rsrv",
274    "specific_reserves",
275    "surplus_rsrv",
276    "prov_nom_risks",
277    "undistributed_profit",
278    "cnvd_diff_foreign_curr_stat",
279    "tot_shrhldr_eqy_excl_min_int",
280    "minority_int",
281    "total_equity",
282    "tot_liab_shrhldr_eqy",
283    "inventory_depreciation_reserve",
284    "current_ratio",
285    "m_cashAdepositsCentralBank",
286    "m_nobleMetal",
287    "m_depositsOtherFinancialInstitutions",
288    "m_currentInvestment",
289    "m_redemptoryMonetaryCapitalSale",
290    "m_netAmountSubrogation",
291    "m_refundableDeposits",
292    "m_netAmountLoanPledged",
293    "m_fixedTimeDeposit",
294    "m_netLongtermDebtInvestments",
295    "m_permanentInvestment",
296    "m_depositForcapitalRecognizance",
297    "m_netBalConstructionProgress",
298    "m_separateAccountAssets",
299    "m_capitalInvicariousBussiness",
300    "m_otherAssets",
301    "m_depositsWithBanksOtherFinancialIns",
302    "m_indemnityPayable",
303    "m_policyDividendPayable",
304    "m_guaranteeInvestmentFunds",
305    "m_premiumsReceivedAdvance",
306    "m_insuranceLiabilities",
307    "m_liabilitiesIndependentAccounts",
308    "m_liabilitiesVicariousBusiness",
309    "m_otherLiablities",
310    "m_capitalPremium",
311    "m_petainedProfit",
312    "m_provisionTransactionRisk",
313    "m_otherReserves",
314    "__extra_141",
315    "__extra_142",
316    "__extra_143",
317    "__extra_144",
318    "__extra_145",
319    "__extra_146",
320    "__extra_147",
321    "__extra_148",
322    "__extra_149",
323    "__extra_150",
324    "__extra_151",
325    "__extra_152",
326    "__extra_153",
327    "__extra_154",
328    "__extra_155",
329];
330
331const INCOME_COLUMN_NAMES: [&str; 80] = [
332    "revenue_inc",
333    "earned_premium",
334    "real_estate_sales_income",
335    "total_operating_cost",
336    "real_estate_sales_cost",
337    "research_expenses",
338    "surrender_value",
339    "net_payments",
340    "net_withdrawal_ins_con_res",
341    "policy_dividend_expenses",
342    "reinsurance_cost",
343    "change_income_fair_value",
344    "futures_loss",
345    "trust_income",
346    "subsidize_revenue",
347    "other_business_profits",
348    "net_profit_excl_merged_int_inc",
349    "int_inc",
350    "handling_chrg_comm_inc",
351    "less_handling_chrg_comm_exp",
352    "other_bus_cost",
353    "plus_net_gain_fx_trans",
354    "il_net_loss_disp_noncur_asset",
355    "inc_tax",
356    "unconfirmed_invest_loss",
357    "net_profit_excl_min_int_inc",
358    "less_int_exp",
359    "other_bus_inc",
360    "revenue",
361    "total_expense",
362    "less_taxes_surcharges_ops",
363    "sale_expense",
364    "less_gerl_admin_exp",
365    "financial_expense",
366    "less_impair_loss_assets",
367    "plus_net_invest_inc",
368    "incl_inc_invest_assoc_jv_entp",
369    "oper_profit",
370    "plus_non_oper_rev",
371    "less_non_oper_exp",
372    "tot_profit",
373    "net_profit_incl_min_int_inc",
374    "net_profit_incl_min_int_inc_after",
375    "minority_int_inc",
376    "s_fa_eps_basic",
377    "s_fa_eps_diluted",
378    "total_income",
379    "total_income_minority",
380    "other_compreh_inc",
381    "operating_revenue",
382    "cost_of_goods_sold",
383    "m_netinterestIncome",
384    "m_netFeesCommissions",
385    "m_insuranceBusiness",
386    "m_separatePremium",
387    "m_asideReservesUndueLiabilities",
388    "m_paymentsInsuranceClaims",
389    "m_amortizedCompensationExpenses",
390    "m_netReserveInsuranceLiability",
391    "m_policyReserve",
392    "m_amortizeInsuranceReserve",
393    "m_nsuranceFeesCommissionExpenses",
394    "m_operationAdministrativeExpense",
395    "m_amortizedReinsuranceExpenditure",
396    "m_netProfitLossdisposalNonassets",
397    "m_otherItemsAffectingNetProfit",
398    "__extra_66",
399    "__extra_67",
400    "__extra_68",
401    "__extra_69",
402    "__extra_70",
403    "__extra_71",
404    "__extra_72",
405    "__extra_73",
406    "__extra_74",
407    "__extra_75",
408    "__extra_76",
409    "__extra_77",
410    "__extra_78",
411    "__extra_79",
412];
413
414const CASHFLOW_COLUMN_NAMES: [&str; 111] = [
415    "cash_received_ori_ins_contract_pre",
416    "net_cash_received_rei_ope",
417    "net_increase_insured_funds",
418    "net_increase_in_disposal",
419    "cash_for_interest",
420    "net_increase_in_repurchase_funds",
421    "cash_for_payment_original_insurance",
422    "cash_payment_policy_dividends",
423    "disposal_other_business_units",
424    "net_cash_deal_subcompany",
425    "cash_received_from_pledges",
426    "cash_paid_for_investments",
427    "net_increase_in_pledged_loans",
428    "cash_paid_by_subsidiaries",
429    "increase_in_cash_paid",
430    "fix_intan_other_asset_dispo_cash_payment",
431    "cash_from_mino_s_invest_sub",
432    "cass_received_sub_abs",
433    "cass_received_sub_investments",
434    "minority_shareholder_profit_loss",
435    "unrecognized_investment_losses",
436    "ncrease_deferred_income",
437    "projected_liability",
438    "increase_operational_payables",
439    "reduction_outstanding_amounts_less",
440    "reduction_outstanding_amounts_more",
441    "goods_sale_and_service_render_cash",
442    "net_incr_dep_cob",
443    "net_incr_loans_central_bank",
444    "net_incr_fund_borr_ofi",
445    "net_incr_fund_borr_ofi",
446    "tax_levy_refund",
447    "cash_paid_invest",
448    "other_cash_pay_ral_inv_act",
449    "other_cash_recp_ral_oper_act",
450    "stot_cash_inflows_oper_act",
451    "goods_and_services_cash_paid",
452    "net_incr_clients_loan_adv",
453    "net_incr_dep_cbob",
454    "handling_chrg_paid",
455    "cash_pay_beh_empl",
456    "pay_all_typ_tax",
457    "other_cash_pay_ral_oper_act",
458    "stot_cash_outflows_oper_act",
459    "net_cash_flows_oper_act",
460    "cash_recp_disp_withdrwl_invest",
461    "cash_recp_return_invest",
462    "net_cash_recp_disp_fiolta",
463    "other_cash_recp_ral_inv_act",
464    "stot_cash_inflows_inv_act",
465    "cash_pay_acq_const_fiolta",
466    "stot_cash_outflows_inv_act",
467    "net_cash_flows_inv_act",
468    "cash_recp_cap_contrib",
469    "cash_recp_borrow",
470    "proc_issue_bonds",
471    "other_cash_recp_ral_fnc_act",
472    "stot_cash_inflows_fnc_act",
473    "cash_prepay_amt_borr",
474    "cash_pay_dist_dpcp_int_exp",
475    "other_cash_pay_ral_fnc_act",
476    "stot_cash_outflows_fnc_act",
477    "net_cash_flows_fnc_act",
478    "eff_fx_flu_cash",
479    "net_incr_cash_cash_equ",
480    "cash_cash_equ_beg_period",
481    "cash_cash_equ_end_period",
482    "net_profit",
483    "plus_prov_depr_assets",
484    "depr_fa_coga_dpba",
485    "amort_intang_assets",
486    "amort_lt_deferred_exp",
487    "decr_deferred_exp",
488    "incr_acc_exp",
489    "loss_disp_fiolta",
490    "loss_scr_fa",
491    "loss_fv_chg",
492    "fin_exp",
493    "invest_loss",
494    "decr_deferred_inc_tax_assets",
495    "incr_deferred_inc_tax_liab",
496    "decr_inventories",
497    "decr_oper_payable",
498    "others",
499    "im_net_cash_flows_oper_act",
500    "conv_debt_into_cap",
501    "conv_corp_bonds_due_within_1y",
502    "fa_fnc_leases",
503    "end_bal_cash",
504    "less_beg_bal_cash",
505    "plus_end_bal_cash_equ",
506    "less_beg_bal_cash_equ",
507    "im_net_incr_cash_cash_equ",
508    "m_netDecreaseUnwindingFunds",
509    "m_netReductionPurchaseRebates",
510    "m_netIncreaseDepositsBanks",
511    "m_netCashReinsuranceBusiness",
512    "m_netReductionDeposInveFunds",
513    "m_netIncreaseUnwindingFunds",
514    "m_netReductionAmountBorrowedFunds",
515    "m_netReductionSaleRepurchaseProceeds",
516    "m_paymentOtherCashRelated",
517    "m_cashOutFlowsInvesactivities",
518    "m_absorbCashEquityInv",
519    "m_otherImpactsOnCash",
520    "m_addOperatingReceivableItems",
521    "__extra_106",
522    "__extra_107",
523    "__extra_108",
524    "__extra_109",
525    "__extra_110",
526];
527
528const RATIO_COLUMN_NAMES: [&str; 41] = [
529    "s_fa_ocfps",
530    "s_fa_bps",
531    "s_fa_eps_basic",
532    "s_fa_eps_diluted",
533    "s_fa_undistributedps",
534    "s_fa_surpluscapitalps",
535    "adjusted_earnings_per_share",
536    "du_return_on_equity",
537    "sales_gross_profit",
538    "inc_revenue_rate",
539    "du_profit_rate",
540    "inc_net_profit_rate",
541    "adjusted_net_profit_rate",
542    "inc_total_revenue_annual",
543    "inc_net_profit_to_shareholders_annual",
544    "adjusted_profit_to_profit_annual",
545    "equity_roe",
546    "net_roe",
547    "total_roe",
548    "gross_profit",
549    "net_profit",
550    "actual_tax_rate",
551    "pre_pay_operate_income",
552    "sales_cash_flow",
553    "pre_pay_operate_income",
554    "sales_cash_flow",
555    "gear_ratio",
556    "inventory_turnover",
557    "m_anntime",
558    "m_timetag",
559    "inc_revenue",
560    "inc_gross_profit",
561    "inc_profit_before_tax",
562    "du_profit",
563    "inc_net_profit",
564    "adjusted_net_profit",
565    "__extra_36",
566    "__extra_37",
567    "__extra_38",
568    "__extra_39",
569    "__extra_40",
570];
571
572impl FinanceRecord {
573    /// 返回当前记录对应的结构化列名。
574    ///
575    /// 仅对 `Report` 和 `Ratios` 类型返回值。
576    pub fn column_names(&self) -> Option<&'static [&'static str]> {
577        FinanceReader::column_names(self.file_type)
578    }
579
580    /// 返回 `(列名, 数值)` 形式的配对结果。
581    ///
582    /// 仅对 `Report` 和 `Ratios` 类型返回值。
583    pub fn named_values(&self) -> Option<Vec<(&'static str, f64)>> {
584        let names = self.column_names()?;
585        match &self.data {
586            FinanceData::Report { columns } => {
587                Some(names.iter().copied().zip(columns.iter().copied()).collect())
588            }
589            FinanceData::Ratios { ratios } => {
590                Some(names.iter().copied().zip(ratios.iter().copied()).collect())
591            }
592            _ => None,
593        }
594    }
595}
596
597/// 财务文件读取器。
598pub struct FinanceReader;
599
600impl FinanceReader {
601    /// 返回指定财务文件类型的结构化列名。
602    ///
603    /// 当前仅对报表类和比率类文件返回值。
604    pub fn column_names(file_type: FileType) -> Option<&'static [&'static str]> {
605        match file_type {
606            FileType::BalanceSheet => Some(&BALANCE_COLUMN_NAMES),
607            FileType::Income => Some(&INCOME_COLUMN_NAMES),
608            FileType::CashFlow => Some(&CASHFLOW_COLUMN_NAMES),
609            FileType::Ratios => Some(&RATIO_COLUMN_NAMES),
610            _ => None,
611        }
612    }
613
614    /// 读取单个财务文件并解析为 [`FinanceRecord`] 列表。
615    ///
616    /// 文件类型会从文件名中的 type id 自动识别,例如 `002419_7001.DAT`。
617    ///
618    /// # Examples
619    ///
620    /// ```no_run
621    /// use qmt_parser::finance::FinanceReader;
622    ///
623    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
624    /// let records = FinanceReader::read_file("finance/002419_7001.DAT")?;
625    /// println!("records = {}", records.len());
626    /// # Ok(())
627    /// # }
628    /// ```
629    pub fn read_file(path: impl AsRef<Path>) -> Result<Vec<FinanceRecord>, FinanceError> {
630        let path = path.as_ref();
631        Self::validate_dat_path(path)?;
632        let file_type = Self::detect_type(path)?;
633        let file = File::open(path)?;
634
635        // mmap 零拷贝读取
636        let mmap = unsafe { MmapOptions::new().map(&file)? };
637        let data = &mmap[..];
638
639        match file_type {
640            FileType::BalanceSheet => Self::parse_fixed(data, STRIDE_BALANCE, 0, |body| {
641                let mut cols = Vec::with_capacity(COLUMNS_BALANCE);
642                for i in 0..COLUMNS_BALANCE {
643                    cols.push(Self::read_f64(body, i * 8).unwrap_or(f64::NAN));
644                }
645                FinanceData::Report { columns: cols }
646            }),
647            FileType::Income => Self::parse_fixed(data, STRIDE_INCOME, 8, |body| {
648                let mut cols = Vec::with_capacity(COLUMNS_INCOME);
649                for i in 0..COLUMNS_INCOME {
650                    cols.push(Self::read_f64(body, i * 8).unwrap_or(f64::NAN));
651                }
652                FinanceData::Report { columns: cols }
653            }),
654            FileType::CashFlow => Self::parse_fixed(data, STRIDE_CASHFLOW, 8, |body| {
655                let mut cols = Vec::with_capacity(COLUMNS_CASHFLOW);
656                for i in 0..COLUMNS_CASHFLOW {
657                    cols.push(Self::read_f64(body, i * 8).unwrap_or(f64::NAN));
658                }
659                FinanceData::Report { columns: cols }
660            }),
661            FileType::Ratios => Self::parse_fixed(data, STRIDE_RATIOS, 0, |body| {
662                let mut cols = Vec::with_capacity(COLUMNS_RATIOS);
663                for i in 0..COLUMNS_RATIOS {
664                    cols.push(Self::read_f64(body, i * 8).unwrap_or(f64::NAN));
665                }
666                FinanceData::Ratios { ratios: cols }
667            }),
668            FileType::Capital => {
669                Self::parse_fixed(data, STRIDE_CAPITAL, 0, |body| {
670                    // Body Offset (Header=16): 0=Total, 8=Flow, 16=Restricted, 24=FreeFloat
671                    FinanceData::Capital {
672                        total_share: Self::read_f64(body, 0).unwrap_or(0.0),
673                        flow_share: Self::read_f64(body, 8).unwrap_or(0.0),
674                        restricted: Self::read_f64(body, 16).unwrap_or(0.0),
675                        free_float_share: Self::read_f64(body, 24).unwrap_or(0.0),
676                    }
677                })
678            }
679            FileType::HolderCount => {
680                // 7005 特殊 Header 顺序: [Announce] [Report]. parse_fixed 默认读 [Report][Announce]
681                // 需要特殊处理? 不,QMT 的 7005 Header 顺序根据 Hex 是:
682                // 00..07: AnnounceDate, 08..15: ReportDate
683                // 我们在 parse_fixed 内部交换一下即可,或者在回调里处理
684                // 为了统一,我们使用专门的 parse_7005
685                Self::parse_7005_fixed(data)
686            }
687            FileType::TopFlowHolder | FileType::TopHolder => {
688                Self::parse_top_holders(data, file_type)
689            }
690        }
691    }
692
693    /// 从文件名中解析 TypeId 并映射到枚举
694    fn detect_type(path: &Path) -> Result<FileType, FinanceError> {
695        let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
696        let id_part = stem.split('_').last().unwrap_or("");
697        let id = id_part
698            .parse::<u16>()
699            .map_err(|_| FinanceError::Parse("Invalid Filename".into()))?;
700        FileType::from_id(id).ok_or(FinanceError::UnsupportedType(id))
701    }
702
703    fn validate_dat_path(path: &Path) -> Result<(), FinanceError> {
704        let ext = path
705            .extension()
706            .and_then(|s| s.to_str())
707            .unwrap_or_default()
708            .to_ascii_lowercase();
709        if ext != "dat" {
710            return Err(FinanceError::InvalidExtension(path.display().to_string()));
711        }
712        Ok(())
713    }
714
715    // --- 定长解析器 (7001-7004, 7008) ---
716    /// 通用定长表解析器,接收回调解析正文部分
717    fn parse_fixed<F>(
718        data: &[u8],
719        stride: usize,
720        header_offset: usize,
721        parser: F,
722    ) -> Result<Vec<FinanceRecord>, FinanceError>
723    where
724        F: Fn(&[u8]) -> FinanceData,
725    {
726        let mut results = Vec::new();
727        let mut cursor = 0;
728        let len = data.len();
729
730        while cursor + header_offset + 16 <= len {
731            // 扫描 Header
732            let header_start = cursor + header_offset;
733            let ts1 = LittleEndian::read_i64(&data[header_start..header_start + 8]);
734            let ts2 = LittleEndian::read_i64(&data[header_start + 8..header_start + 16]);
735
736            // 7001-7004, 7008 顺序: Report, Announce
737            if Self::is_valid_ts(ts1) {
738                // 如果发现有效头
739                if cursor + stride <= len {
740                    let report_date = Self::ts_to_bj(ts1);
741                    // Announce Date 可能是 0,如果是 0,回退到 Report Date
742                    let announce_date = if Self::is_valid_ts(ts2) {
743                        Self::ts_to_bj(ts2)
744                    } else {
745                        report_date // Fallback
746                    };
747
748                    let body = &data[header_start + 16..cursor + stride];
749                    results.push(FinanceRecord {
750                        file_type: Self::file_type_from_stride(stride)?,
751                        report_date,
752                        announce_date,
753                        data: parser(body),
754                    });
755
756                    cursor += stride;
757                    continue;
758                }
759            }
760            // 滑动窗口寻找下一个有效头
761            cursor += 8;
762        }
763        Ok(results)
764    }
765
766    // --- 7005 专用解析 (Header 顺序颠倒) ---
767    /// 针对 7005 (股东人数) 的定长解析
768    fn parse_7005_fixed(data: &[u8]) -> Result<Vec<FinanceRecord>, FinanceError> {
769        let mut results = Vec::new();
770        let mut cursor = 0;
771        let stride = STRIDE_HOLDER;
772
773        while cursor + 16 <= data.len() {
774            let ts1 = LittleEndian::read_i64(&data[cursor..cursor + 8]); // Announce
775            let ts2 = LittleEndian::read_i64(&data[cursor + 8..cursor + 16]); // Report
776
777            // 只要有一个有效,就尝试解析
778            if Self::is_valid_ts(ts2) {
779                if cursor + stride <= data.len() {
780                    let report_date = Self::ts_to_bj(ts2);
781                    let announce_date = if Self::is_valid_ts(ts1) {
782                        Self::ts_to_bj(ts1)
783                    } else {
784                        report_date
785                    };
786
787                    let body = &data[cursor + 16..cursor + stride];
788                    let total_holders = Self::read_f64(body, 0).unwrap_or(0.0) as i64;
789                    let a_holders = Self::read_f64(body, 8).unwrap_or(0.0) as i64;
790                    let b_holders = Self::read_f64(body, 16).unwrap_or(0.0) as i64;
791                    let h_holders = Self::read_f64(body, 24).unwrap_or(0.0) as i64;
792                    let float_holders = Self::read_f64(body, 32).unwrap_or(0.0) as i64;
793                    let other_holders = Self::read_f64(body, 40).unwrap_or(0.0) as i64;
794
795                    results.push(FinanceRecord {
796                        file_type: FileType::HolderCount,
797                        report_date,
798                        announce_date,
799                        data: FinanceData::HolderCount {
800                            total_holders,
801                            a_holders,
802                            b_holders,
803                            h_holders,
804                            float_holders,
805                            other_holders,
806                        },
807                    });
808                    cursor += stride;
809                    continue;
810                }
811            }
812            cursor += 8;
813        }
814        Ok(results)
815    }
816
817    /// 解析 7006/7007 十大(流通)股东定长记录,并按同一报告期聚合。
818    fn parse_top_holders(
819        data: &[u8],
820        file_type: FileType,
821    ) -> Result<Vec<FinanceRecord>, FinanceError> {
822        let mut results = Vec::new();
823        let mut current_report_ts = 0i64;
824        let mut current_announce_ts = 0i64;
825        let mut current_holders = Vec::new();
826
827        for chunk in data.chunks_exact(STRIDE_TOP_HOLDER) {
828            let announce_ts = LittleEndian::read_i64(&chunk[0..8]);
829            let report_ts = LittleEndian::read_i64(&chunk[8..16]);
830            if !Self::is_valid_ts(announce_ts) || !Self::is_valid_ts(report_ts) {
831                continue;
832            }
833
834            let holder = Self::parse_top_holder_record(chunk);
835            if current_holders.is_empty() {
836                current_report_ts = report_ts;
837                current_announce_ts = announce_ts;
838            }
839
840            if report_ts != current_report_ts || announce_ts != current_announce_ts {
841                results.push(FinanceRecord {
842                    file_type,
843                    report_date: Self::ts_to_bj(current_report_ts),
844                    announce_date: Self::ts_to_bj(current_announce_ts),
845                    data: FinanceData::TopHolder {
846                        holders: std::mem::take(&mut current_holders),
847                    },
848                });
849                current_report_ts = report_ts;
850                current_announce_ts = announce_ts;
851            }
852
853            current_holders.push(holder);
854        }
855
856        if !current_holders.is_empty() {
857            results.push(FinanceRecord {
858                file_type,
859                report_date: Self::ts_to_bj(current_report_ts),
860                announce_date: Self::ts_to_bj(current_announce_ts),
861                data: FinanceData::TopHolder {
862                    holders: current_holders,
863                },
864            });
865        }
866
867        Ok(results)
868    }
869
870    fn parse_top_holder_record(record: &[u8]) -> Shareholder {
871        Shareholder {
872            name: Self::read_string(record, 16, 192),
873            holder_type: Self::read_string(record, 216, 56),
874            hold_amount: Self::read_f64(record, 272).unwrap_or(0.0),
875            change_reason: Self::read_string(record, 280, 16),
876            hold_ratio: Self::read_f64(record, 304).unwrap_or(0.0),
877            share_type: Self::read_string(record, 312, 96),
878            rank: LittleEndian::read_u32(&record[412..416]),
879        }
880    }
881
882    fn file_type_from_stride(stride: usize) -> Result<FileType, FinanceError> {
883        match stride {
884            STRIDE_BALANCE => Ok(FileType::BalanceSheet),
885            STRIDE_INCOME => Ok(FileType::Income),
886            STRIDE_CASHFLOW => Ok(FileType::CashFlow),
887            STRIDE_RATIOS => Ok(FileType::Ratios),
888            STRIDE_CAPITAL => Ok(FileType::Capital),
889            STRIDE_HOLDER => Ok(FileType::HolderCount),
890            _ => Err(FinanceError::Parse(format!(
891                "Unknown finance stride: {}",
892                stride
893            ))),
894        }
895    }
896
897    fn is_valid_ts(ts: i64) -> bool {
898        ts >= MIN_VALID_TS && ts <= MAX_VALID_TS
899    }
900
901    /// 将毫秒时间戳转换为北京时间
902    fn ts_to_bj(ts: i64) -> BjDateTime {
903        // 构建 UTC+8
904        let tz = FixedOffset::east_opt(8 * 3600).unwrap();
905        let secs = ts / 1000;
906        let nsecs = (ts % 1000) * 1_000_000;
907        tz.timestamp_opt(secs, nsecs as u32)
908            .single()
909            .unwrap_or_default()
910    }
911
912    /// 读取 f64 并处理哨兵值
913    fn read_f64(data: &[u8], offset: usize) -> Option<f64> {
914        if offset + 8 > data.len() {
915            return None;
916        }
917        let u = LittleEndian::read_u64(&data[offset..offset + 8]);
918        if u == QMT_NAN_HEX {
919            return None;
920        }
921        let f = f64::from_bits(u);
922        if f.is_nan() { None } else { Some(f) }
923    }
924
925    /// 从定长缓冲区读取 UTF-8 字符串
926    fn read_string(data: &[u8], offset: usize, max_len: usize) -> String {
927        if offset >= data.len() {
928            return String::new();
929        }
930        let end = (offset + max_len).min(data.len());
931        let slice = &data[offset..end];
932        // 找 \0 结尾
933        let actual_len = slice.iter().position(|&c| c == 0).unwrap_or(slice.len());
934        String::from_utf8_lossy(&slice[..actual_len])
935            .trim()
936            .to_string()
937    }
938}
939
940#[cfg(test)]
941mod tests {
942    use super::*;
943    use std::path::PathBuf;
944
945    fn get_fixture(file: &str) -> PathBuf {
946        PathBuf::from("/home/sunnysab/Code/trade-rs/qmt-parser/finance/").join(file)
947    }
948
949    // 辅助函数:打印前5条
950    fn print_head(type_id: u16, records: &[FinanceRecord]) {
951        println!(
952            "\n>>> [Type {}] Found {} records. Showing first 5:",
953            type_id,
954            records.len()
955        );
956        for (i, rec) in records.iter().take(5).enumerate() {
957            println!(
958                "#{:03} | Report: {} | Announce: {}",
959                i,
960                rec.report_date.format("%Y-%m-%d"),
961                rec.announce_date.format("%Y-%m-%d")
962            );
963            // 打印具体的 Data 枚举内容,使用 {:#?} 美化输出
964            println!("Data: {:#?}\n", rec.data);
965        }
966        if records.is_empty() {
967            println!("(No records found)\n");
968        } else {
969            println!(
970                "... (remaining {} records omitted)\n",
971                records.len().saturating_sub(5)
972            );
973        }
974    }
975
976    #[test]
977    fn test_7001_balance_sheet() {
978        let path = get_fixture("002419_7001.DAT");
979        if !path.exists() {
980            eprintln!("Skipping 7001: File not found");
981            return;
982        }
983
984        let res = FinanceReader::read_file(&path).expect("Failed to parse 7001");
985        assert!(!res.is_empty(), "7001 should not be empty");
986
987        if let FinanceData::Report { columns } = &res[0].data {
988            assert_eq!(columns.len(), 156);
989            assert!(columns[0].is_nan());
990            assert!((columns[11] - 273_297_896.39).abs() < 1e-6);
991        } else {
992            panic!("7001 parsed as wrong type");
993        }
994
995        print_head(7001, &res);
996    }
997
998    #[test]
999    fn test_7002_income() {
1000        let path = get_fixture("002419_7002.DAT");
1001        if !path.exists() {
1002            eprintln!("Skipping 7002: File not found");
1003            return;
1004        }
1005
1006        let res = FinanceReader::read_file(&path).expect("Failed to parse 7002");
1007        assert!(!res.is_empty(), "7002 should not be empty");
1008
1009        if let FinanceData::Report { columns } = &res[0].data {
1010            assert_eq!(columns.len(), 80);
1011            assert!((columns[0] - 4_809_251_460.5).abs() < 1e-6);
1012            assert!(columns[1].is_nan());
1013        } else {
1014            panic!("7002 parsed as wrong type");
1015        }
1016
1017        print_head(7002, &res);
1018    }
1019
1020    #[test]
1021    fn test_7003_cashflow() {
1022        let path = get_fixture("002419_7003.DAT");
1023        if !path.exists() {
1024            eprintln!("Skipping 7003: File not found");
1025            return;
1026        }
1027
1028        let res = FinanceReader::read_file(&path).expect("Failed to parse 7003");
1029        assert!(!res.is_empty(), "7003 should not be empty");
1030
1031        if let FinanceData::Report { columns } = &res[0].data {
1032            assert_eq!(columns.len(), 111);
1033            assert!(columns[0].is_nan());
1034            assert!((columns[23] - 5_506_707_615.58).abs() < 1e-6);
1035        } else {
1036            panic!("7003 parsed as wrong type");
1037        }
1038
1039        print_head(7003, &res);
1040    }
1041
1042    #[test]
1043    fn test_7004_capital() {
1044        let path = get_fixture("002419_7004.DAT");
1045        if !path.exists() {
1046            eprintln!("Skipping 7004: File not found");
1047            return;
1048        }
1049
1050        let res = FinanceReader::read_file(&path).expect("Failed to parse 7004");
1051        assert!(!res.is_empty(), "7004 should not be empty");
1052
1053        if let FinanceData::Capital {
1054            total_share,
1055            flow_share,
1056            restricted,
1057            free_float_share,
1058        } = &res[0].data
1059        {
1060            assert_eq!(*total_share, 400_100_000.0);
1061            assert_eq!(*flow_share, 40_080_000.0);
1062            assert_eq!(*restricted, 0.0);
1063            assert_eq!(*free_float_share, 40_080_000.0);
1064        } else {
1065            panic!("7004 parsed as wrong type");
1066        }
1067
1068        print_head(7004, &res);
1069    }
1070
1071    #[test]
1072    fn test_7005_holder_count() {
1073        let path = get_fixture("002419_7005.DAT");
1074        if !path.exists() {
1075            eprintln!("Skipping 7005: File not found");
1076            return;
1077        }
1078
1079        let res = FinanceReader::read_file(&path).expect("Failed to parse 7005");
1080        assert!(!res.is_empty(), "7005 should not be empty");
1081
1082        if let FinanceData::HolderCount {
1083            total_holders,
1084            a_holders,
1085            b_holders,
1086            h_holders,
1087            float_holders,
1088            other_holders,
1089        } = &res[0].data
1090        {
1091            assert_eq!(*total_holders, 35_719);
1092            assert_eq!(*a_holders, 35_719);
1093            assert_eq!(*b_holders, 0);
1094            assert_eq!(*h_holders, 0);
1095            assert_eq!(*float_holders, 0);
1096            assert_eq!(*other_holders, 0);
1097        } else {
1098            panic!("7005 parsed as wrong type");
1099        }
1100
1101        print_head(7005, &res);
1102    }
1103
1104    #[test]
1105    fn test_7006_top_float_holder() {
1106        let path = get_fixture("002419_7006.DAT");
1107        if !path.exists() {
1108            eprintln!("Skipping 7006: File not found");
1109            return;
1110        }
1111
1112        let res = FinanceReader::read_file(&path).expect("Failed to parse 7006");
1113        assert!(!res.is_empty(), "7006 should not be empty");
1114
1115        if let FinanceData::TopHolder { holders } = &res[0].data {
1116            assert_eq!(holders.len(), 40);
1117            assert_eq!(holders[0].name, "中国航空技术深圳有限公司");
1118            assert_eq!(holders[0].holder_type, "机构投资账户");
1119            assert_eq!(holders[0].hold_amount, 158_128_000.0);
1120            assert_eq!(holders[0].change_reason, "不变");
1121            assert_eq!(holders[0].hold_ratio, 39.52);
1122            assert_eq!(holders[0].share_type, "流通A股");
1123            assert_eq!(holders[0].rank, 1);
1124        } else {
1125            panic!("7006 parsed as wrong type");
1126        }
1127
1128        print_head(7006, &res);
1129    }
1130
1131    #[test]
1132    fn test_7007_top_holder() {
1133        let path = get_fixture("002419_7007.DAT");
1134        if !path.exists() {
1135            eprintln!("Skipping 7007: File not found");
1136            return;
1137        }
1138
1139        let res = FinanceReader::read_file(&path).expect("Failed to parse 7007");
1140        assert!(!res.is_empty(), "7007 should not be empty");
1141
1142        if let FinanceData::TopHolder { holders } = &res[0].data {
1143            assert_eq!(holders.len(), 10);
1144            assert_eq!(holders[0].name, "中国工商银行-诺安股票证券投资基金");
1145            assert_eq!(holders[0].holder_type, "机构投资账户");
1146            assert_eq!(holders[0].hold_amount, 1_799_860.0);
1147            assert_eq!(holders[0].change_reason, "不变");
1148            assert_eq!(holders[0].hold_ratio, 0.45);
1149            assert_eq!(holders[0].share_type, "流通A股");
1150            assert_eq!(holders[0].rank, 1);
1151        } else {
1152            panic!("7007 parsed as wrong type");
1153        }
1154
1155        print_head(7007, &res);
1156    }
1157
1158    #[test]
1159    fn test_7008_ratios() {
1160        let path = get_fixture("002419_7008.DAT");
1161        if !path.exists() {
1162            eprintln!("Skipping 7008: File not found");
1163            return;
1164        }
1165
1166        let res = FinanceReader::read_file(&path).expect("Failed to parse 7008");
1167        assert!(!res.is_empty(), "7008 should not be empty");
1168
1169        if let FinanceData::Ratios { ratios } = &res[0].data {
1170            assert_eq!(ratios.len(), 41);
1171        } else {
1172            panic!("7008 parsed as wrong type");
1173        }
1174
1175        print_head(7008, &res);
1176    }
1177
1178    #[test]
1179    fn test_report_column_names() {
1180        let balance = FinanceReader::column_names(FileType::BalanceSheet).expect("7001 names");
1181        assert_eq!(balance.len(), 156);
1182        assert_eq!(balance[0], "internal_shoule_recv");
1183        assert_eq!(balance[44], "cash_equivalents");
1184        assert_eq!(balance[140], "m_otherReserves");
1185        assert_eq!(balance[141], "__extra_141");
1186
1187        let income = FinanceReader::column_names(FileType::Income).expect("7002 names");
1188        assert_eq!(income.len(), 80);
1189        assert_eq!(income[0], "revenue_inc");
1190        assert_eq!(income[3], "total_operating_cost");
1191        assert_eq!(income[65], "m_otherItemsAffectingNetProfit");
1192        assert_eq!(income[66], "__extra_66");
1193
1194        let cashflow = FinanceReader::column_names(FileType::CashFlow).expect("7003 names");
1195        assert_eq!(cashflow.len(), 111);
1196        assert_eq!(cashflow[0], "cash_received_ori_ins_contract_pre");
1197        assert_eq!(cashflow[26], "goods_sale_and_service_render_cash");
1198        assert_eq!(cashflow[105], "m_addOperatingReceivableItems");
1199        assert_eq!(cashflow[106], "__extra_106");
1200
1201        let ratios = FinanceReader::column_names(FileType::Ratios).expect("7008 names");
1202        assert_eq!(ratios.len(), 41);
1203        assert_eq!(ratios[0], "s_fa_ocfps");
1204        assert_eq!(ratios[1], "s_fa_bps");
1205        assert_eq!(ratios[35], "adjusted_net_profit");
1206        assert_eq!(ratios[36], "__extra_36");
1207    }
1208
1209    #[test]
1210    fn test_named_values_for_ratios() {
1211        let path = get_fixture("002419_7008.DAT");
1212        if !path.exists() {
1213            eprintln!("Skipping 7008: File not found");
1214            return;
1215        }
1216
1217        let res = FinanceReader::read_file(&path).expect("Failed to parse 7008");
1218        let named = res[0].named_values().expect("named ratios");
1219
1220        assert_eq!(named[0].0, "s_fa_ocfps");
1221        assert!((named[0].1 - 0.5646).abs() < 1e-9);
1222        assert_eq!(named[1].0, "s_fa_bps");
1223        assert!((named[1].1 - 7.62).abs() < 1e-9);
1224    }
1225
1226    #[test]
1227    fn test_finance_record_is_self_describing() {
1228        let path = get_fixture("002419_7001.DAT");
1229        if !path.exists() {
1230            eprintln!("Skipping 7001: File not found");
1231            return;
1232        }
1233
1234        let res = FinanceReader::read_file(&path).expect("Failed to parse 7001");
1235        let first = &res[0];
1236        assert_eq!(first.file_type, FileType::BalanceSheet);
1237        let names = first.column_names().expect("balance sheet names");
1238        assert_eq!(names[0], "internal_shoule_recv");
1239
1240        let named = first.named_values().expect("self-described values");
1241        assert_eq!(named[0].0, "internal_shoule_recv");
1242    }
1243}