Skip to main content

qmt_parser/
finance.rs

1//! QMT 本地财务 `.DAT` 解析。
2//!
3//! 这里的字段命名来自仓库内样本和逆向结果,部分列仍保留“尽量稳定但未完全确认”
4//! 的语义。对需要结构化列名的场景,优先使用 [`FinanceRecord::column_names`] 和
5//! [`FinanceRecord::named_values`],不要自行硬编码列顺序。
6
7use std::fs::File;
8use std::io;
9use std::path::Path;
10
11use byteorder::{ByteOrder, LittleEndian};
12use chrono::{DateTime, FixedOffset, TimeZone};
13use memmap2::MmapOptions;
14use thiserror::Error;
15
16const STRIDE_BALANCE: usize = 1264; // 7001
17const STRIDE_INCOME: usize = 664; // 7002
18const STRIDE_CASHFLOW: usize = 920; // 7003
19const STRIDE_RATIOS: usize = 344; // 7008
20const STRIDE_CAPITAL: usize = 56; // 7004
21const STRIDE_HOLDER: usize = 64; // 7005
22const STRIDE_TOP_HOLDER: usize = 416; // 7006, 7007
23const COLUMNS_BALANCE: usize = 156;
24const COLUMNS_INCOME: usize = 80;
25const COLUMNS_CASHFLOW: usize = 111;
26const COLUMNS_RATIOS: usize = 41;
27
28// 有效时间戳范围 (1990年 - 2050年, 毫秒)
29const MIN_VALID_TS: i64 = 631_152_000_000;
30const MAX_VALID_TS: i64 = 2_524_608_000_000;
31const QMT_NAN_HEX: u64 = 0x7FEFFFFFFFFFFFFF;
32
33// --- 错误定义 ---
34/// 财务解析错误。
35#[derive(Debug, Error)]
36pub enum FinanceError {
37    /// 文件读取失败。
38    #[error("IO Error: {0}")]
39    Io(#[from] io::Error),
40    /// 文件扩展名不是 `.dat` 或 `.DAT`。
41    #[error("Invalid File Extension: {0}")]
42    InvalidExtension(String),
43    /// 文件名中的 type id 不是当前支持的财务类型。
44    #[error("Unsupported File Type ID: {0}")]
45    UnsupportedType(u16),
46    /// 解析过程中发现记录布局或字段值异常。
47    #[error("Parse Error: {0}")]
48    Parse(String),
49}
50
51/// 北京时间类型(UTC+8)。
52pub type BjDateTime = DateTime<FixedOffset>;
53
54/// 一条财务记录。
55///
56/// 每条记录都包含财务文件类型、报告日期、公告日期,以及对应的 typed 数据载荷。
57#[derive(Debug, Clone)]
58pub struct FinanceRecord {
59    /// 当前记录对应的财务文件类型。
60    pub file_type: FileType,
61    /// 报告期日期。
62    pub report_date: BjDateTime,
63    /// 公告日期。
64    pub announce_date: BjDateTime,
65    /// 具体数据载荷。
66    pub data: FinanceData,
67}
68
69/// 不同类型的财务数据载荷。
70#[derive(Debug, Clone)]
71pub enum FinanceData {
72    /// 7001、7002、7003 财务报表数值列。
73    Report {
74        /// 按文件类型对应 schema 排列的数值列。
75        columns: Vec<f64>,
76    },
77    /// 7004 股本结构。
78    Capital {
79        /// 总股本。
80        total_share: f64,
81        /// 流通股本。
82        flow_share: f64,
83        /// 限售股本。
84        restricted: f64,
85        /// 自由流通股本。
86        free_float_share: f64,
87    },
88    /// 7005 股东人数。
89    HolderCount {
90        /// 股东总数。
91        total_holders: i64,
92        /// A 股股东数。
93        a_holders: i64,
94        /// B 股股东数。
95        b_holders: i64,
96        /// H 股股东数。
97        h_holders: i64,
98        /// 流通股股东数。
99        float_holders: i64,
100        /// 其他股东数。
101        other_holders: i64,
102    },
103    /// 7006、7007 十大股东或十大流通股东。
104    TopHolder {
105        /// 记录中的股东列表。
106        holders: Vec<Shareholder>,
107    },
108    /// 7008 财务比率数值列。
109    Ratios {
110        /// 按比率表 schema 排列的数值列。
111        ratios: Vec<f64>,
112    },
113}
114
115/// 单个股东条目。
116#[derive(Debug, Clone)]
117pub struct Shareholder {
118    /// 股东名称。
119    pub name: String,
120    /// 股东类型。
121    pub holder_type: String,
122    /// 持股数量。
123    pub hold_amount: f64,
124    /// 变动原因。
125    pub change_reason: String,
126    /// 持股比例,如 `0.05` 代表 `5%`。
127    pub hold_ratio: f64,
128    /// 股份性质,例如“流通A股”。
129    pub share_type: String,
130    /// 股东排名。
131    pub rank: u32,
132}
133
134#[derive(Debug, Clone, Copy, PartialEq)]
135/// 财务文件类型枚举。
136pub enum FileType {
137    /// 7001 资产负债表。
138    BalanceSheet = 7001,
139    /// 7002 利润表。
140    Income = 7002,
141    /// 7003 现金流量表。
142    CashFlow = 7003,
143    /// 7004 股本结构。
144    Capital = 7004,
145    /// 7005 股东人数。
146    HolderCount = 7005,
147    /// 7006 十大流通股东。
148    TopFlowHolder = 7006,
149    /// 7007 十大股东。
150    TopHolder = 7007,
151    /// 7008 财务比率。
152    Ratios = 7008,
153}
154
155impl FileType {
156    /// 从文件名中的 type id 映射为 [`FileType`]。
157    pub fn from_id(id: u16) -> Option<Self> {
158        match id {
159            7001 => Some(Self::BalanceSheet),
160            7002 => Some(Self::Income),
161            7003 => Some(Self::CashFlow),
162            7004 => Some(Self::Capital),
163            7005 => Some(Self::HolderCount),
164            7006 => Some(Self::TopFlowHolder),
165            7007 => Some(Self::TopHolder),
166            7008 => Some(Self::Ratios),
167            _ => None,
168        }
169    }
170}
171
172const BALANCE_COLUMN_NAMES: [&str; 156] = [
173    "internal_shoule_recv",
174    "fixed_capital_clearance",
175    "should_pay_money",
176    "settlement_payment",
177    "receivable_premium",
178    "accounts_receivable_reinsurance",
179    "reinsurance_contract_reserve",
180    "dividends_payable",
181    "tax_rebate_for_export",
182    "subsidies_receivable",
183    "deposit_receivable",
184    "apportioned_cost",
185    "profit_and_current_assets_with_deal",
186    "current_assets_one_year",
187    "long_term_receivables",
188    "other_long_term_investments",
189    "original_value_of_fixed_assets",
190    "net_value_of_fixed_assets",
191    "depreciation_reserves_of_fixed_assets",
192    "productive_biological_assets",
193    "public_welfare_biological_assets",
194    "oil_and_gas_assets",
195    "development_expenditure",
196    "right_of_split_share_distribution",
197    "other_non_mobile_assets",
198    "handling_fee_and_commission",
199    "other_payables",
200    "margin_payable",
201    "internal_accounts_payable",
202    "advance_cost",
203    "insurance_contract_reserve",
204    "broker_buying_and_selling_securities",
205    "acting_underwriting_securities",
206    "international_ticket_settlement",
207    "domestic_ticket_settlement",
208    "deferred_income",
209    "short_term_bonds_payable",
210    "long_term_deferred_income",
211    "undetermined_investment_losses",
212    "quasi_distribution_of_cash_dividends",
213    "provisions_not",
214    "cust_bank_dep",
215    "provisions",
216    "less_tsy_stk",
217    "cash_equivalents",
218    "loans_to_oth_banks",
219    "tradable_fin_assets",
220    "derivative_fin_assets",
221    "bill_receivable",
222    "account_receivable",
223    "advance_payment",
224    "int_rcv",
225    "other_receivable",
226    "red_monetary_cap_for_sale",
227    "agency_bus_assets",
228    "inventories",
229    "other_current_assets",
230    "total_current_assets",
231    "loans_and_adv_granted",
232    "fin_assets_avail_for_sale",
233    "held_to_mty_invest",
234    "long_term_eqy_invest",
235    "invest_real_estate",
236    "accumulated_depreciation",
237    "fix_assets",
238    "constru_in_process",
239    "construction_materials",
240    "long_term_liabilities",
241    "intang_assets",
242    "goodwill",
243    "long_deferred_expense",
244    "deferred_tax_assets",
245    "total_non_current_assets",
246    "tot_assets",
247    "shortterm_loan",
248    "borrow_central_bank",
249    "loans_oth_banks",
250    "tradable_fin_liab",
251    "derivative_fin_liab",
252    "notes_payable",
253    "accounts_payable",
254    "advance_peceipts",
255    "fund_sales_fin_assets_rp",
256    "empl_ben_payable",
257    "taxes_surcharges_payable",
258    "int_payable",
259    "dividend_payable",
260    "other_payable",
261    "non_current_liability_in_one_year",
262    "other_current_liability",
263    "total_current_liability",
264    "long_term_loans",
265    "bonds_payable",
266    "longterm_account_payable",
267    "grants_received",
268    "deferred_tax_liab",
269    "other_non_current_liabilities",
270    "non_current_liabilities",
271    "tot_liab",
272    "cap_stk",
273    "cap_rsrv",
274    "specific_reserves",
275    "surplus_rsrv",
276    "prov_nom_risks",
277    "undistributed_profit",
278    "cnvd_diff_foreign_curr_stat",
279    "tot_shrhldr_eqy_excl_min_int",
280    "minority_int",
281    "total_equity",
282    "tot_liab_shrhldr_eqy",
283    "inventory_depreciation_reserve",
284    "current_ratio",
285    "m_cashAdepositsCentralBank",
286    "m_nobleMetal",
287    "m_depositsOtherFinancialInstitutions",
288    "m_currentInvestment",
289    "m_redemptoryMonetaryCapitalSale",
290    "m_netAmountSubrogation",
291    "m_refundableDeposits",
292    "m_netAmountLoanPledged",
293    "m_fixedTimeDeposit",
294    "m_netLongtermDebtInvestments",
295    "m_permanentInvestment",
296    "m_depositForcapitalRecognizance",
297    "m_netBalConstructionProgress",
298    "m_separateAccountAssets",
299    "m_capitalInvicariousBussiness",
300    "m_otherAssets",
301    "m_depositsWithBanksOtherFinancialIns",
302    "m_indemnityPayable",
303    "m_policyDividendPayable",
304    "m_guaranteeInvestmentFunds",
305    "m_premiumsReceivedAdvance",
306    "m_insuranceLiabilities",
307    "m_liabilitiesIndependentAccounts",
308    "m_liabilitiesVicariousBusiness",
309    "m_otherLiablities",
310    "m_capitalPremium",
311    "m_petainedProfit",
312    "m_provisionTransactionRisk",
313    "m_otherReserves",
314    "__extra_141",
315    "__extra_142",
316    "__extra_143",
317    "__extra_144",
318    "__extra_145",
319    "__extra_146",
320    "__extra_147",
321    "__extra_148",
322    "__extra_149",
323    "__extra_150",
324    "__extra_151",
325    "__extra_152",
326    "__extra_153",
327    "__extra_154",
328    "__extra_155",
329];
330
331const INCOME_COLUMN_NAMES: [&str; 80] = [
332    "revenue_inc",
333    "earned_premium",
334    "real_estate_sales_income",
335    "total_operating_cost",
336    "real_estate_sales_cost",
337    "research_expenses",
338    "surrender_value",
339    "net_payments",
340    "net_withdrawal_ins_con_res",
341    "policy_dividend_expenses",
342    "reinsurance_cost",
343    "change_income_fair_value",
344    "futures_loss",
345    "trust_income",
346    "subsidize_revenue",
347    "other_business_profits",
348    "net_profit_excl_merged_int_inc",
349    "int_inc",
350    "handling_chrg_comm_inc",
351    "less_handling_chrg_comm_exp",
352    "other_bus_cost",
353    "plus_net_gain_fx_trans",
354    "il_net_loss_disp_noncur_asset",
355    "inc_tax",
356    "unconfirmed_invest_loss",
357    "net_profit_excl_min_int_inc",
358    "less_int_exp",
359    "other_bus_inc",
360    "revenue",
361    "total_expense",
362    "less_taxes_surcharges_ops",
363    "sale_expense",
364    "less_gerl_admin_exp",
365    "financial_expense",
366    "less_impair_loss_assets",
367    "plus_net_invest_inc",
368    "incl_inc_invest_assoc_jv_entp",
369    "oper_profit",
370    "plus_non_oper_rev",
371    "less_non_oper_exp",
372    "tot_profit",
373    "net_profit_incl_min_int_inc",
374    "net_profit_incl_min_int_inc_after",
375    "minority_int_inc",
376    "s_fa_eps_basic",
377    "s_fa_eps_diluted",
378    "total_income",
379    "total_income_minority",
380    "other_compreh_inc",
381    "operating_revenue",
382    "cost_of_goods_sold",
383    "m_netinterestIncome",
384    "m_netFeesCommissions",
385    "m_insuranceBusiness",
386    "m_separatePremium",
387    "m_asideReservesUndueLiabilities",
388    "m_paymentsInsuranceClaims",
389    "m_amortizedCompensationExpenses",
390    "m_netReserveInsuranceLiability",
391    "m_policyReserve",
392    "m_amortizeInsuranceReserve",
393    "m_nsuranceFeesCommissionExpenses",
394    "m_operationAdministrativeExpense",
395    "m_amortizedReinsuranceExpenditure",
396    "m_netProfitLossdisposalNonassets",
397    "m_otherItemsAffectingNetProfit",
398    "__extra_66",
399    "__extra_67",
400    "__extra_68",
401    "__extra_69",
402    "__extra_70",
403    "__extra_71",
404    "__extra_72",
405    "__extra_73",
406    "__extra_74",
407    "__extra_75",
408    "__extra_76",
409    "__extra_77",
410    "__extra_78",
411    "__extra_79",
412];
413
414const CASHFLOW_COLUMN_NAMES: [&str; 111] = [
415    "cash_received_ori_ins_contract_pre",
416    "net_cash_received_rei_ope",
417    "net_increase_insured_funds",
418    "net_increase_in_disposal",
419    "cash_for_interest",
420    "net_increase_in_repurchase_funds",
421    "cash_for_payment_original_insurance",
422    "cash_payment_policy_dividends",
423    "disposal_other_business_units",
424    "net_cash_deal_subcompany",
425    "cash_received_from_pledges",
426    "cash_paid_for_investments",
427    "net_increase_in_pledged_loans",
428    "cash_paid_by_subsidiaries",
429    "increase_in_cash_paid",
430    "fix_intan_other_asset_dispo_cash_payment",
431    "cash_from_mino_s_invest_sub",
432    "cass_received_sub_abs",
433    "cass_received_sub_investments",
434    "minority_shareholder_profit_loss",
435    "unrecognized_investment_losses",
436    "ncrease_deferred_income",
437    "projected_liability",
438    "increase_operational_payables",
439    "reduction_outstanding_amounts_less",
440    "reduction_outstanding_amounts_more",
441    "goods_sale_and_service_render_cash",
442    "net_incr_dep_cob",
443    "net_incr_loans_central_bank",
444    "net_incr_fund_borr_ofi",
445    "net_incr_fund_borr_ofi",
446    "tax_levy_refund",
447    "cash_paid_invest",
448    "other_cash_pay_ral_inv_act",
449    "other_cash_recp_ral_oper_act",
450    "stot_cash_inflows_oper_act",
451    "goods_and_services_cash_paid",
452    "net_incr_clients_loan_adv",
453    "net_incr_dep_cbob",
454    "handling_chrg_paid",
455    "cash_pay_beh_empl",
456    "pay_all_typ_tax",
457    "other_cash_pay_ral_oper_act",
458    "stot_cash_outflows_oper_act",
459    "net_cash_flows_oper_act",
460    "cash_recp_disp_withdrwl_invest",
461    "cash_recp_return_invest",
462    "net_cash_recp_disp_fiolta",
463    "other_cash_recp_ral_inv_act",
464    "stot_cash_inflows_inv_act",
465    "cash_pay_acq_const_fiolta",
466    "stot_cash_outflows_inv_act",
467    "net_cash_flows_inv_act",
468    "cash_recp_cap_contrib",
469    "cash_recp_borrow",
470    "proc_issue_bonds",
471    "other_cash_recp_ral_fnc_act",
472    "stot_cash_inflows_fnc_act",
473    "cash_prepay_amt_borr",
474    "cash_pay_dist_dpcp_int_exp",
475    "other_cash_pay_ral_fnc_act",
476    "stot_cash_outflows_fnc_act",
477    "net_cash_flows_fnc_act",
478    "eff_fx_flu_cash",
479    "net_incr_cash_cash_equ",
480    "cash_cash_equ_beg_period",
481    "cash_cash_equ_end_period",
482    "net_profit",
483    "plus_prov_depr_assets",
484    "depr_fa_coga_dpba",
485    "amort_intang_assets",
486    "amort_lt_deferred_exp",
487    "decr_deferred_exp",
488    "incr_acc_exp",
489    "loss_disp_fiolta",
490    "loss_scr_fa",
491    "loss_fv_chg",
492    "fin_exp",
493    "invest_loss",
494    "decr_deferred_inc_tax_assets",
495    "incr_deferred_inc_tax_liab",
496    "decr_inventories",
497    "decr_oper_payable",
498    "others",
499    "im_net_cash_flows_oper_act",
500    "conv_debt_into_cap",
501    "conv_corp_bonds_due_within_1y",
502    "fa_fnc_leases",
503    "end_bal_cash",
504    "less_beg_bal_cash",
505    "plus_end_bal_cash_equ",
506    "less_beg_bal_cash_equ",
507    "im_net_incr_cash_cash_equ",
508    "m_netDecreaseUnwindingFunds",
509    "m_netReductionPurchaseRebates",
510    "m_netIncreaseDepositsBanks",
511    "m_netCashReinsuranceBusiness",
512    "m_netReductionDeposInveFunds",
513    "m_netIncreaseUnwindingFunds",
514    "m_netReductionAmountBorrowedFunds",
515    "m_netReductionSaleRepurchaseProceeds",
516    "m_paymentOtherCashRelated",
517    "m_cashOutFlowsInvesactivities",
518    "m_absorbCashEquityInv",
519    "m_otherImpactsOnCash",
520    "m_addOperatingReceivableItems",
521    "__extra_106",
522    "__extra_107",
523    "__extra_108",
524    "__extra_109",
525    "__extra_110",
526];
527
528const RATIO_COLUMN_NAMES: [&str; 41] = [
529    "s_fa_ocfps",
530    "s_fa_bps",
531    "s_fa_eps_basic",
532    "s_fa_eps_diluted",
533    "s_fa_undistributedps",
534    "s_fa_surpluscapitalps",
535    "adjusted_earnings_per_share",
536    "du_return_on_equity",
537    "sales_gross_profit",
538    "inc_revenue_rate",
539    "du_profit_rate",
540    "inc_net_profit_rate",
541    "adjusted_net_profit_rate",
542    "inc_total_revenue_annual",
543    "inc_net_profit_to_shareholders_annual",
544    "adjusted_profit_to_profit_annual",
545    "equity_roe",
546    "net_roe",
547    "total_roe",
548    "gross_profit",
549    "net_profit",
550    "actual_tax_rate",
551    "pre_pay_operate_income",
552    "sales_cash_flow",
553    "pre_pay_operate_income",
554    "sales_cash_flow",
555    "gear_ratio",
556    "inventory_turnover",
557    "m_anntime",
558    "m_timetag",
559    "inc_revenue",
560    "inc_gross_profit",
561    "inc_profit_before_tax",
562    "du_profit",
563    "inc_net_profit",
564    "adjusted_net_profit",
565    "__extra_36",
566    "__extra_37",
567    "__extra_38",
568    "__extra_39",
569    "__extra_40",
570];
571
572impl FinanceRecord {
573    /// 返回当前记录对应的结构化列名。
574    ///
575    /// 仅对 `Report` 和 `Ratios` 类型返回值。
576    pub fn column_names(&self) -> Option<&'static [&'static str]> {
577        FinanceReader::column_names(self.file_type)
578    }
579
580    /// 返回 `(列名, 数值)` 形式的配对结果。
581    ///
582    /// 仅对 `Report` 和 `Ratios` 类型返回值。
583    pub fn named_values(&self) -> Option<Vec<(&'static str, f64)>> {
584        let names = self.column_names()?;
585        match &self.data {
586            FinanceData::Report { columns } => {
587                Some(names.iter().copied().zip(columns.iter().copied()).collect())
588            }
589            FinanceData::Ratios { ratios } => {
590                Some(names.iter().copied().zip(ratios.iter().copied()).collect())
591            }
592            _ => None,
593        }
594    }
595}
596
597/// 财务文件读取器。
598pub struct FinanceReader;
599
600impl FinanceReader {
601    /// 返回指定财务文件类型的结构化列名。
602    ///
603    /// 当前仅对报表类和比率类文件返回值。
604    pub fn column_names(file_type: FileType) -> Option<&'static [&'static str]> {
605        match file_type {
606            FileType::BalanceSheet => Some(&BALANCE_COLUMN_NAMES),
607            FileType::Income => Some(&INCOME_COLUMN_NAMES),
608            FileType::CashFlow => Some(&CASHFLOW_COLUMN_NAMES),
609            FileType::Ratios => Some(&RATIO_COLUMN_NAMES),
610            _ => None,
611        }
612    }
613
614    /// 读取单个财务文件并解析为 [`FinanceRecord`] 列表。
615    ///
616    /// 文件类型会从文件名中的 type id 自动识别,例如 `002419_7001.DAT`。
617    ///
618    /// # Examples
619    ///
620    /// ```no_run
621    /// use qmt_parser::finance::FinanceReader;
622    ///
623    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
624    /// let records = FinanceReader::read_file("finance/002419_7001.DAT")?;
625    /// println!("records = {}", records.len());
626    /// # Ok(())
627    /// # }
628    /// ```
629    pub fn read_file(path: impl AsRef<Path>) -> Result<Vec<FinanceRecord>, FinanceError> {
630        let path = path.as_ref();
631        Self::validate_dat_path(path)?;
632        let file_type = Self::detect_type(path)?;
633        let file = File::open(path)?;
634
635        // mmap 零拷贝读取
636        let mmap = unsafe { MmapOptions::new().map(&file)? };
637        let data = &mmap[..];
638
639        match file_type {
640            FileType::BalanceSheet => Self::parse_fixed(data, STRIDE_BALANCE, 0, |body| {
641                let mut cols = Vec::with_capacity(COLUMNS_BALANCE);
642                for i in 0..COLUMNS_BALANCE {
643                    cols.push(Self::read_f64(body, i * 8).unwrap_or(f64::NAN));
644                }
645                FinanceData::Report { columns: cols }
646            }),
647            FileType::Income => Self::parse_fixed(data, STRIDE_INCOME, 8, |body| {
648                let mut cols = Vec::with_capacity(COLUMNS_INCOME);
649                for i in 0..COLUMNS_INCOME {
650                    cols.push(Self::read_f64(body, i * 8).unwrap_or(f64::NAN));
651                }
652                FinanceData::Report { columns: cols }
653            }),
654            FileType::CashFlow => Self::parse_fixed(data, STRIDE_CASHFLOW, 8, |body| {
655                let mut cols = Vec::with_capacity(COLUMNS_CASHFLOW);
656                for i in 0..COLUMNS_CASHFLOW {
657                    cols.push(Self::read_f64(body, i * 8).unwrap_or(f64::NAN));
658                }
659                FinanceData::Report { columns: cols }
660            }),
661            FileType::Ratios => Self::parse_fixed(data, STRIDE_RATIOS, 0, |body| {
662                let mut cols = Vec::with_capacity(COLUMNS_RATIOS);
663                for i in 0..COLUMNS_RATIOS {
664                    cols.push(Self::read_f64(body, i * 8).unwrap_or(f64::NAN));
665                }
666                FinanceData::Ratios { ratios: cols }
667            }),
668            FileType::Capital => {
669                Self::parse_fixed(data, STRIDE_CAPITAL, 0, |body| {
670                    // Body Offset (Header=16): 0=Total, 8=Flow, 16=Restricted, 24=FreeFloat
671                    FinanceData::Capital {
672                        total_share: Self::read_f64(body, 0).unwrap_or(0.0),
673                        flow_share: Self::read_f64(body, 8).unwrap_or(0.0),
674                        restricted: Self::read_f64(body, 16).unwrap_or(0.0),
675                        free_float_share: Self::read_f64(body, 24).unwrap_or(0.0),
676                    }
677                })
678            }
679            FileType::HolderCount => {
680                // 7005 特殊 Header 顺序: [Announce] [Report]. parse_fixed 默认读 [Report][Announce]
681                // 需要特殊处理? 不,QMT 的 7005 Header 顺序根据 Hex 是:
682                // 00..07: AnnounceDate, 08..15: ReportDate
683                // 我们在 parse_fixed 内部交换一下即可,或者在回调里处理
684                // 为了统一,我们使用专门的 parse_7005
685                Self::parse_7005_fixed(data)
686            }
687            FileType::TopFlowHolder | FileType::TopHolder => {
688                Self::parse_top_holders(data, file_type)
689            }
690        }
691    }
692
693    /// 从文件名中解析 TypeId 并映射到枚举
694    fn detect_type(path: &Path) -> Result<FileType, FinanceError> {
695        let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
696        let id_part = stem.split('_').next_back().unwrap_or("");
697        let id = id_part
698            .parse::<u16>()
699            .map_err(|_| FinanceError::Parse("Invalid Filename".into()))?;
700        FileType::from_id(id).ok_or(FinanceError::UnsupportedType(id))
701    }
702
703    fn validate_dat_path(path: &Path) -> Result<(), FinanceError> {
704        let ext = path
705            .extension()
706            .and_then(|s| s.to_str())
707            .unwrap_or_default()
708            .to_ascii_lowercase();
709        if ext != "dat" {
710            return Err(FinanceError::InvalidExtension(path.display().to_string()));
711        }
712        Ok(())
713    }
714
715    // --- 定长解析器 (7001-7004, 7008) ---
716    /// 通用定长表解析器,接收回调解析正文部分
717    fn parse_fixed<F>(
718        data: &[u8],
719        stride: usize,
720        header_offset: usize,
721        parser: F,
722    ) -> Result<Vec<FinanceRecord>, FinanceError>
723    where
724        F: Fn(&[u8]) -> FinanceData,
725    {
726        let mut results = Vec::new();
727        let mut cursor = 0;
728        let len = data.len();
729
730        while cursor + header_offset + 16 <= len {
731            // 扫描 Header
732            let header_start = cursor + header_offset;
733            let ts1 = LittleEndian::read_i64(&data[header_start..header_start + 8]);
734            let ts2 = LittleEndian::read_i64(&data[header_start + 8..header_start + 16]);
735
736            // 7001-7004, 7008 顺序: Report, Announce
737            if Self::is_valid_ts(ts1) {
738                // 如果发现有效头
739                if cursor + stride <= len {
740                    let report_date = Self::ts_to_bj(ts1);
741                    // Announce Date 可能是 0,如果是 0,回退到 Report Date
742                    let announce_date = if Self::is_valid_ts(ts2) {
743                        Self::ts_to_bj(ts2)
744                    } else {
745                        report_date // Fallback
746                    };
747
748                    let body = &data[header_start + 16..cursor + stride];
749                    results.push(FinanceRecord {
750                        file_type: Self::file_type_from_stride(stride)?,
751                        report_date,
752                        announce_date,
753                        data: parser(body),
754                    });
755
756                    cursor += stride;
757                    continue;
758                }
759            }
760            // 滑动窗口寻找下一个有效头
761            cursor += 8;
762        }
763        Ok(results)
764    }
765
766    // --- 7005 专用解析 (Header 顺序颠倒) ---
767    /// 针对 7005 (股东人数) 的定长解析
768    fn parse_7005_fixed(data: &[u8]) -> Result<Vec<FinanceRecord>, FinanceError> {
769        let mut results = Vec::new();
770        let mut cursor = 0;
771        let stride = STRIDE_HOLDER;
772
773        while cursor + 16 <= data.len() {
774            let ts1 = LittleEndian::read_i64(&data[cursor..cursor + 8]); // Announce
775            let ts2 = LittleEndian::read_i64(&data[cursor + 8..cursor + 16]); // Report
776
777            // 只要有一个有效,就尝试解析
778            if Self::is_valid_ts(ts2) && cursor + stride <= data.len() {
779                let report_date = Self::ts_to_bj(ts2);
780                let announce_date = if Self::is_valid_ts(ts1) {
781                    Self::ts_to_bj(ts1)
782                } else {
783                    report_date
784                };
785
786                let body = &data[cursor + 16..cursor + stride];
787                let total_holders = Self::read_f64(body, 0).unwrap_or(0.0) as i64;
788                let a_holders = Self::read_f64(body, 8).unwrap_or(0.0) as i64;
789                let b_holders = Self::read_f64(body, 16).unwrap_or(0.0) as i64;
790                let h_holders = Self::read_f64(body, 24).unwrap_or(0.0) as i64;
791                let float_holders = Self::read_f64(body, 32).unwrap_or(0.0) as i64;
792                let other_holders = Self::read_f64(body, 40).unwrap_or(0.0) as i64;
793
794                results.push(FinanceRecord {
795                    file_type: FileType::HolderCount,
796                    report_date,
797                    announce_date,
798                    data: FinanceData::HolderCount {
799                        total_holders,
800                        a_holders,
801                        b_holders,
802                        h_holders,
803                        float_holders,
804                        other_holders,
805                    },
806                });
807                cursor += stride;
808                continue;
809            }
810            cursor += 8;
811        }
812        Ok(results)
813    }
814
815    /// 解析 7006/7007 十大(流通)股东定长记录,并按同一报告期聚合。
816    fn parse_top_holders(
817        data: &[u8],
818        file_type: FileType,
819    ) -> Result<Vec<FinanceRecord>, FinanceError> {
820        let mut results = Vec::new();
821        let mut current_report_ts = 0i64;
822        let mut current_announce_ts = 0i64;
823        let mut current_holders = Vec::new();
824
825        for chunk in data.chunks_exact(STRIDE_TOP_HOLDER) {
826            let announce_ts = LittleEndian::read_i64(&chunk[0..8]);
827            let report_ts = LittleEndian::read_i64(&chunk[8..16]);
828            if !Self::is_valid_ts(announce_ts) || !Self::is_valid_ts(report_ts) {
829                continue;
830            }
831
832            let holder = Self::parse_top_holder_record(chunk);
833            if current_holders.is_empty() {
834                current_report_ts = report_ts;
835                current_announce_ts = announce_ts;
836            }
837
838            if report_ts != current_report_ts || announce_ts != current_announce_ts {
839                results.push(FinanceRecord {
840                    file_type,
841                    report_date: Self::ts_to_bj(current_report_ts),
842                    announce_date: Self::ts_to_bj(current_announce_ts),
843                    data: FinanceData::TopHolder {
844                        holders: std::mem::take(&mut current_holders),
845                    },
846                });
847                current_report_ts = report_ts;
848                current_announce_ts = announce_ts;
849            }
850
851            current_holders.push(holder);
852        }
853
854        if !current_holders.is_empty() {
855            results.push(FinanceRecord {
856                file_type,
857                report_date: Self::ts_to_bj(current_report_ts),
858                announce_date: Self::ts_to_bj(current_announce_ts),
859                data: FinanceData::TopHolder {
860                    holders: current_holders,
861                },
862            });
863        }
864
865        Ok(results)
866    }
867
868    fn parse_top_holder_record(record: &[u8]) -> Shareholder {
869        Shareholder {
870            name: Self::read_string(record, 16, 192),
871            holder_type: Self::read_string(record, 216, 56),
872            hold_amount: Self::read_f64(record, 272).unwrap_or(0.0),
873            change_reason: Self::read_string(record, 280, 16),
874            hold_ratio: Self::read_f64(record, 304).unwrap_or(0.0),
875            share_type: Self::read_string(record, 312, 96),
876            rank: LittleEndian::read_u32(&record[412..416]),
877        }
878    }
879
880    fn file_type_from_stride(stride: usize) -> Result<FileType, FinanceError> {
881        match stride {
882            STRIDE_BALANCE => Ok(FileType::BalanceSheet),
883            STRIDE_INCOME => Ok(FileType::Income),
884            STRIDE_CASHFLOW => Ok(FileType::CashFlow),
885            STRIDE_RATIOS => Ok(FileType::Ratios),
886            STRIDE_CAPITAL => Ok(FileType::Capital),
887            STRIDE_HOLDER => Ok(FileType::HolderCount),
888            _ => Err(FinanceError::Parse(format!(
889                "Unknown finance stride: {}",
890                stride
891            ))),
892        }
893    }
894
895    fn is_valid_ts(ts: i64) -> bool {
896        (MIN_VALID_TS..=MAX_VALID_TS).contains(&ts)
897    }
898
899    /// 将毫秒时间戳转换为北京时间
900    fn ts_to_bj(ts: i64) -> BjDateTime {
901        // 构建 UTC+8
902        let tz = FixedOffset::east_opt(8 * 3600).unwrap();
903        let secs = ts / 1000;
904        let nsecs = (ts % 1000) * 1_000_000;
905        tz.timestamp_opt(secs, nsecs as u32)
906            .single()
907            .unwrap_or_default()
908    }
909
910    /// 读取 f64 并处理哨兵值
911    fn read_f64(data: &[u8], offset: usize) -> Option<f64> {
912        if offset + 8 > data.len() {
913            return None;
914        }
915        let u = LittleEndian::read_u64(&data[offset..offset + 8]);
916        if u == QMT_NAN_HEX {
917            return None;
918        }
919        let f = f64::from_bits(u);
920        if f.is_nan() { None } else { Some(f) }
921    }
922
923    /// 从定长缓冲区读取 UTF-8 字符串
924    fn read_string(data: &[u8], offset: usize, max_len: usize) -> String {
925        if offset >= data.len() {
926            return String::new();
927        }
928        let end = (offset + max_len).min(data.len());
929        let slice = &data[offset..end];
930        // 找 \0 结尾
931        let actual_len = slice.iter().position(|&c| c == 0).unwrap_or(slice.len());
932        String::from_utf8_lossy(&slice[..actual_len])
933            .trim()
934            .to_string()
935    }
936}
937
938#[cfg(test)]
939mod tests {
940    use super::*;
941    use std::path::PathBuf;
942
943    fn get_fixture(file: &str) -> PathBuf {
944        PathBuf::from("/home/sunnysab/Code/trade-rs/qmt-parser/finance/").join(file)
945    }
946
947    // 辅助函数:打印前5条
948    fn print_head(type_id: u16, records: &[FinanceRecord]) {
949        println!(
950            "\n>>> [Type {}] Found {} records. Showing first 5:",
951            type_id,
952            records.len()
953        );
954        for (i, rec) in records.iter().take(5).enumerate() {
955            println!(
956                "#{:03} | Report: {} | Announce: {}",
957                i,
958                rec.report_date.format("%Y-%m-%d"),
959                rec.announce_date.format("%Y-%m-%d")
960            );
961            // 打印具体的 Data 枚举内容,使用 {:#?} 美化输出
962            println!("Data: {:#?}\n", rec.data);
963        }
964        if records.is_empty() {
965            println!("(No records found)\n");
966        } else {
967            println!(
968                "... (remaining {} records omitted)\n",
969                records.len().saturating_sub(5)
970            );
971        }
972    }
973
974    #[test]
975    fn test_7001_balance_sheet() {
976        let path = get_fixture("002419_7001.DAT");
977        if !path.exists() {
978            eprintln!("Skipping 7001: File not found");
979            return;
980        }
981
982        let res = FinanceReader::read_file(&path).expect("Failed to parse 7001");
983        assert!(!res.is_empty(), "7001 should not be empty");
984
985        if let FinanceData::Report { columns } = &res[0].data {
986            assert_eq!(columns.len(), 156);
987            assert!(columns[0].is_nan());
988            assert!((columns[11] - 273_297_896.39).abs() < 1e-6);
989        } else {
990            panic!("7001 parsed as wrong type");
991        }
992
993        print_head(7001, &res);
994    }
995
996    #[test]
997    fn test_7002_income() {
998        let path = get_fixture("002419_7002.DAT");
999        if !path.exists() {
1000            eprintln!("Skipping 7002: File not found");
1001            return;
1002        }
1003
1004        let res = FinanceReader::read_file(&path).expect("Failed to parse 7002");
1005        assert!(!res.is_empty(), "7002 should not be empty");
1006
1007        if let FinanceData::Report { columns } = &res[0].data {
1008            assert_eq!(columns.len(), 80);
1009            assert!((columns[0] - 4_809_251_460.5).abs() < 1e-6);
1010            assert!(columns[1].is_nan());
1011        } else {
1012            panic!("7002 parsed as wrong type");
1013        }
1014
1015        print_head(7002, &res);
1016    }
1017
1018    #[test]
1019    fn test_7003_cashflow() {
1020        let path = get_fixture("002419_7003.DAT");
1021        if !path.exists() {
1022            eprintln!("Skipping 7003: File not found");
1023            return;
1024        }
1025
1026        let res = FinanceReader::read_file(&path).expect("Failed to parse 7003");
1027        assert!(!res.is_empty(), "7003 should not be empty");
1028
1029        if let FinanceData::Report { columns } = &res[0].data {
1030            assert_eq!(columns.len(), 111);
1031            assert!(columns[0].is_nan());
1032            assert!((columns[23] - 5_506_707_615.58).abs() < 1e-6);
1033        } else {
1034            panic!("7003 parsed as wrong type");
1035        }
1036
1037        print_head(7003, &res);
1038    }
1039
1040    #[test]
1041    fn test_7004_capital() {
1042        let path = get_fixture("002419_7004.DAT");
1043        if !path.exists() {
1044            eprintln!("Skipping 7004: File not found");
1045            return;
1046        }
1047
1048        let res = FinanceReader::read_file(&path).expect("Failed to parse 7004");
1049        assert!(!res.is_empty(), "7004 should not be empty");
1050
1051        if let FinanceData::Capital {
1052            total_share,
1053            flow_share,
1054            restricted,
1055            free_float_share,
1056        } = &res[0].data
1057        {
1058            assert_eq!(*total_share, 400_100_000.0);
1059            assert_eq!(*flow_share, 40_080_000.0);
1060            assert_eq!(*restricted, 0.0);
1061            assert_eq!(*free_float_share, 40_080_000.0);
1062        } else {
1063            panic!("7004 parsed as wrong type");
1064        }
1065
1066        print_head(7004, &res);
1067    }
1068
1069    #[test]
1070    fn test_7005_holder_count() {
1071        let path = get_fixture("002419_7005.DAT");
1072        if !path.exists() {
1073            eprintln!("Skipping 7005: File not found");
1074            return;
1075        }
1076
1077        let res = FinanceReader::read_file(&path).expect("Failed to parse 7005");
1078        assert!(!res.is_empty(), "7005 should not be empty");
1079
1080        if let FinanceData::HolderCount {
1081            total_holders,
1082            a_holders,
1083            b_holders,
1084            h_holders,
1085            float_holders,
1086            other_holders,
1087        } = &res[0].data
1088        {
1089            assert_eq!(*total_holders, 35_719);
1090            assert_eq!(*a_holders, 35_719);
1091            assert_eq!(*b_holders, 0);
1092            assert_eq!(*h_holders, 0);
1093            assert_eq!(*float_holders, 0);
1094            assert_eq!(*other_holders, 0);
1095        } else {
1096            panic!("7005 parsed as wrong type");
1097        }
1098
1099        print_head(7005, &res);
1100    }
1101
1102    #[test]
1103    fn test_7006_top_float_holder() {
1104        let path = get_fixture("002419_7006.DAT");
1105        if !path.exists() {
1106            eprintln!("Skipping 7006: File not found");
1107            return;
1108        }
1109
1110        let res = FinanceReader::read_file(&path).expect("Failed to parse 7006");
1111        assert!(!res.is_empty(), "7006 should not be empty");
1112
1113        if let FinanceData::TopHolder { holders } = &res[0].data {
1114            assert_eq!(holders.len(), 40);
1115            assert_eq!(holders[0].name, "中国航空技术深圳有限公司");
1116            assert_eq!(holders[0].holder_type, "机构投资账户");
1117            assert_eq!(holders[0].hold_amount, 158_128_000.0);
1118            assert_eq!(holders[0].change_reason, "不变");
1119            assert_eq!(holders[0].hold_ratio, 39.52);
1120            assert_eq!(holders[0].share_type, "流通A股");
1121            assert_eq!(holders[0].rank, 1);
1122        } else {
1123            panic!("7006 parsed as wrong type");
1124        }
1125
1126        print_head(7006, &res);
1127    }
1128
1129    #[test]
1130    fn test_7007_top_holder() {
1131        let path = get_fixture("002419_7007.DAT");
1132        if !path.exists() {
1133            eprintln!("Skipping 7007: File not found");
1134            return;
1135        }
1136
1137        let res = FinanceReader::read_file(&path).expect("Failed to parse 7007");
1138        assert!(!res.is_empty(), "7007 should not be empty");
1139
1140        if let FinanceData::TopHolder { holders } = &res[0].data {
1141            assert_eq!(holders.len(), 10);
1142            assert_eq!(holders[0].name, "中国工商银行-诺安股票证券投资基金");
1143            assert_eq!(holders[0].holder_type, "机构投资账户");
1144            assert_eq!(holders[0].hold_amount, 1_799_860.0);
1145            assert_eq!(holders[0].change_reason, "不变");
1146            assert_eq!(holders[0].hold_ratio, 0.45);
1147            assert_eq!(holders[0].share_type, "流通A股");
1148            assert_eq!(holders[0].rank, 1);
1149        } else {
1150            panic!("7007 parsed as wrong type");
1151        }
1152
1153        print_head(7007, &res);
1154    }
1155
1156    #[test]
1157    fn test_7008_ratios() {
1158        let path = get_fixture("002419_7008.DAT");
1159        if !path.exists() {
1160            eprintln!("Skipping 7008: File not found");
1161            return;
1162        }
1163
1164        let res = FinanceReader::read_file(&path).expect("Failed to parse 7008");
1165        assert!(!res.is_empty(), "7008 should not be empty");
1166
1167        if let FinanceData::Ratios { ratios } = &res[0].data {
1168            assert_eq!(ratios.len(), 41);
1169        } else {
1170            panic!("7008 parsed as wrong type");
1171        }
1172
1173        print_head(7008, &res);
1174    }
1175
1176    #[test]
1177    fn test_report_column_names() {
1178        let balance = FinanceReader::column_names(FileType::BalanceSheet).expect("7001 names");
1179        assert_eq!(balance.len(), 156);
1180        assert_eq!(balance[0], "internal_shoule_recv");
1181        assert_eq!(balance[44], "cash_equivalents");
1182        assert_eq!(balance[140], "m_otherReserves");
1183        assert_eq!(balance[141], "__extra_141");
1184
1185        let income = FinanceReader::column_names(FileType::Income).expect("7002 names");
1186        assert_eq!(income.len(), 80);
1187        assert_eq!(income[0], "revenue_inc");
1188        assert_eq!(income[3], "total_operating_cost");
1189        assert_eq!(income[65], "m_otherItemsAffectingNetProfit");
1190        assert_eq!(income[66], "__extra_66");
1191
1192        let cashflow = FinanceReader::column_names(FileType::CashFlow).expect("7003 names");
1193        assert_eq!(cashflow.len(), 111);
1194        assert_eq!(cashflow[0], "cash_received_ori_ins_contract_pre");
1195        assert_eq!(cashflow[26], "goods_sale_and_service_render_cash");
1196        assert_eq!(cashflow[105], "m_addOperatingReceivableItems");
1197        assert_eq!(cashflow[106], "__extra_106");
1198
1199        let ratios = FinanceReader::column_names(FileType::Ratios).expect("7008 names");
1200        assert_eq!(ratios.len(), 41);
1201        assert_eq!(ratios[0], "s_fa_ocfps");
1202        assert_eq!(ratios[1], "s_fa_bps");
1203        assert_eq!(ratios[35], "adjusted_net_profit");
1204        assert_eq!(ratios[36], "__extra_36");
1205    }
1206
1207    #[test]
1208    fn test_named_values_for_ratios() {
1209        let path = get_fixture("002419_7008.DAT");
1210        if !path.exists() {
1211            eprintln!("Skipping 7008: File not found");
1212            return;
1213        }
1214
1215        let res = FinanceReader::read_file(&path).expect("Failed to parse 7008");
1216        let named = res[0].named_values().expect("named ratios");
1217
1218        assert_eq!(named[0].0, "s_fa_ocfps");
1219        assert!((named[0].1 - 0.5646).abs() < 1e-9);
1220        assert_eq!(named[1].0, "s_fa_bps");
1221        assert!((named[1].1 - 7.62).abs() < 1e-9);
1222    }
1223
1224    #[test]
1225    fn test_finance_record_is_self_describing() {
1226        let path = get_fixture("002419_7001.DAT");
1227        if !path.exists() {
1228            eprintln!("Skipping 7001: File not found");
1229            return;
1230        }
1231
1232        let res = FinanceReader::read_file(&path).expect("Failed to parse 7001");
1233        let first = &res[0];
1234        assert_eq!(first.file_type, FileType::BalanceSheet);
1235        let names = first.column_names().expect("balance sheet names");
1236        assert_eq!(names[0], "internal_shoule_recv");
1237
1238        let named = first.named_values().expect("self-described values");
1239        assert_eq!(named[0].0, "internal_shoule_recv");
1240    }
1241}