1use std::fs::File;
8use std::io;
9use std::path::Path;
10
11use byteorder::{ByteOrder, LittleEndian};
12use chrono::{DateTime, FixedOffset, TimeZone};
13use memmap2::MmapOptions;
14use thiserror::Error;
15
16const STRIDE_BALANCE: usize = 1264; const STRIDE_INCOME: usize = 664; const STRIDE_CASHFLOW: usize = 920; const STRIDE_RATIOS: usize = 344; const STRIDE_CAPITAL: usize = 56; const STRIDE_HOLDER: usize = 64; const STRIDE_TOP_HOLDER: usize = 416; const COLUMNS_BALANCE: usize = 156;
24const COLUMNS_INCOME: usize = 80;
25const COLUMNS_CASHFLOW: usize = 111;
26const COLUMNS_RATIOS: usize = 41;
27
28const MIN_VALID_TS: i64 = 631_152_000_000;
30const MAX_VALID_TS: i64 = 2_524_608_000_000;
31const QMT_NAN_HEX: u64 = 0x7FEFFFFFFFFFFFFF;
32
33#[derive(Debug, Error)]
36pub enum FinanceError {
37 #[error("IO Error: {0}")]
39 Io(#[from] io::Error),
40 #[error("Invalid File Extension: {0}")]
42 InvalidExtension(String),
43 #[error("Unsupported File Type ID: {0}")]
45 UnsupportedType(u16),
46 #[error("Parse Error: {0}")]
48 Parse(String),
49}
50
51pub type BjDateTime = DateTime<FixedOffset>;
53
54#[derive(Debug, Clone)]
58pub struct FinanceRecord {
59 pub file_type: FileType,
61 pub report_date: BjDateTime,
63 pub announce_date: BjDateTime,
65 pub data: FinanceData,
67}
68
69#[derive(Debug, Clone)]
71pub enum FinanceData {
72 Report {
74 columns: Vec<f64>,
76 },
77 Capital {
79 total_share: f64,
81 flow_share: f64,
83 restricted: f64,
85 free_float_share: f64,
87 },
88 HolderCount {
90 total_holders: i64,
92 a_holders: i64,
94 b_holders: i64,
96 h_holders: i64,
98 float_holders: i64,
100 other_holders: i64,
102 },
103 TopHolder {
105 holders: Vec<Shareholder>,
107 },
108 Ratios {
110 ratios: Vec<f64>,
112 },
113}
114
115#[derive(Debug, Clone)]
117pub struct Shareholder {
118 pub name: String,
120 pub holder_type: String,
122 pub hold_amount: f64,
124 pub change_reason: String,
126 pub hold_ratio: f64,
128 pub share_type: String,
130 pub rank: u32,
132}
133
134#[derive(Debug, Clone, Copy, PartialEq)]
135pub enum FileType {
137 BalanceSheet = 7001,
139 Income = 7002,
141 CashFlow = 7003,
143 Capital = 7004,
145 HolderCount = 7005,
147 TopFlowHolder = 7006,
149 TopHolder = 7007,
151 Ratios = 7008,
153}
154
155impl FileType {
156 pub fn from_id(id: u16) -> Option<Self> {
158 match id {
159 7001 => Some(Self::BalanceSheet),
160 7002 => Some(Self::Income),
161 7003 => Some(Self::CashFlow),
162 7004 => Some(Self::Capital),
163 7005 => Some(Self::HolderCount),
164 7006 => Some(Self::TopFlowHolder),
165 7007 => Some(Self::TopHolder),
166 7008 => Some(Self::Ratios),
167 _ => None,
168 }
169 }
170}
171
172const BALANCE_COLUMN_NAMES: [&str; 156] = [
173 "internal_shoule_recv",
174 "fixed_capital_clearance",
175 "should_pay_money",
176 "settlement_payment",
177 "receivable_premium",
178 "accounts_receivable_reinsurance",
179 "reinsurance_contract_reserve",
180 "dividends_payable",
181 "tax_rebate_for_export",
182 "subsidies_receivable",
183 "deposit_receivable",
184 "apportioned_cost",
185 "profit_and_current_assets_with_deal",
186 "current_assets_one_year",
187 "long_term_receivables",
188 "other_long_term_investments",
189 "original_value_of_fixed_assets",
190 "net_value_of_fixed_assets",
191 "depreciation_reserves_of_fixed_assets",
192 "productive_biological_assets",
193 "public_welfare_biological_assets",
194 "oil_and_gas_assets",
195 "development_expenditure",
196 "right_of_split_share_distribution",
197 "other_non_mobile_assets",
198 "handling_fee_and_commission",
199 "other_payables",
200 "margin_payable",
201 "internal_accounts_payable",
202 "advance_cost",
203 "insurance_contract_reserve",
204 "broker_buying_and_selling_securities",
205 "acting_underwriting_securities",
206 "international_ticket_settlement",
207 "domestic_ticket_settlement",
208 "deferred_income",
209 "short_term_bonds_payable",
210 "long_term_deferred_income",
211 "undetermined_investment_losses",
212 "quasi_distribution_of_cash_dividends",
213 "provisions_not",
214 "cust_bank_dep",
215 "provisions",
216 "less_tsy_stk",
217 "cash_equivalents",
218 "loans_to_oth_banks",
219 "tradable_fin_assets",
220 "derivative_fin_assets",
221 "bill_receivable",
222 "account_receivable",
223 "advance_payment",
224 "int_rcv",
225 "other_receivable",
226 "red_monetary_cap_for_sale",
227 "agency_bus_assets",
228 "inventories",
229 "other_current_assets",
230 "total_current_assets",
231 "loans_and_adv_granted",
232 "fin_assets_avail_for_sale",
233 "held_to_mty_invest",
234 "long_term_eqy_invest",
235 "invest_real_estate",
236 "accumulated_depreciation",
237 "fix_assets",
238 "constru_in_process",
239 "construction_materials",
240 "long_term_liabilities",
241 "intang_assets",
242 "goodwill",
243 "long_deferred_expense",
244 "deferred_tax_assets",
245 "total_non_current_assets",
246 "tot_assets",
247 "shortterm_loan",
248 "borrow_central_bank",
249 "loans_oth_banks",
250 "tradable_fin_liab",
251 "derivative_fin_liab",
252 "notes_payable",
253 "accounts_payable",
254 "advance_peceipts",
255 "fund_sales_fin_assets_rp",
256 "empl_ben_payable",
257 "taxes_surcharges_payable",
258 "int_payable",
259 "dividend_payable",
260 "other_payable",
261 "non_current_liability_in_one_year",
262 "other_current_liability",
263 "total_current_liability",
264 "long_term_loans",
265 "bonds_payable",
266 "longterm_account_payable",
267 "grants_received",
268 "deferred_tax_liab",
269 "other_non_current_liabilities",
270 "non_current_liabilities",
271 "tot_liab",
272 "cap_stk",
273 "cap_rsrv",
274 "specific_reserves",
275 "surplus_rsrv",
276 "prov_nom_risks",
277 "undistributed_profit",
278 "cnvd_diff_foreign_curr_stat",
279 "tot_shrhldr_eqy_excl_min_int",
280 "minority_int",
281 "total_equity",
282 "tot_liab_shrhldr_eqy",
283 "inventory_depreciation_reserve",
284 "current_ratio",
285 "m_cashAdepositsCentralBank",
286 "m_nobleMetal",
287 "m_depositsOtherFinancialInstitutions",
288 "m_currentInvestment",
289 "m_redemptoryMonetaryCapitalSale",
290 "m_netAmountSubrogation",
291 "m_refundableDeposits",
292 "m_netAmountLoanPledged",
293 "m_fixedTimeDeposit",
294 "m_netLongtermDebtInvestments",
295 "m_permanentInvestment",
296 "m_depositForcapitalRecognizance",
297 "m_netBalConstructionProgress",
298 "m_separateAccountAssets",
299 "m_capitalInvicariousBussiness",
300 "m_otherAssets",
301 "m_depositsWithBanksOtherFinancialIns",
302 "m_indemnityPayable",
303 "m_policyDividendPayable",
304 "m_guaranteeInvestmentFunds",
305 "m_premiumsReceivedAdvance",
306 "m_insuranceLiabilities",
307 "m_liabilitiesIndependentAccounts",
308 "m_liabilitiesVicariousBusiness",
309 "m_otherLiablities",
310 "m_capitalPremium",
311 "m_petainedProfit",
312 "m_provisionTransactionRisk",
313 "m_otherReserves",
314 "__extra_141",
315 "__extra_142",
316 "__extra_143",
317 "__extra_144",
318 "__extra_145",
319 "__extra_146",
320 "__extra_147",
321 "__extra_148",
322 "__extra_149",
323 "__extra_150",
324 "__extra_151",
325 "__extra_152",
326 "__extra_153",
327 "__extra_154",
328 "__extra_155",
329];
330
331const INCOME_COLUMN_NAMES: [&str; 80] = [
332 "revenue_inc",
333 "earned_premium",
334 "real_estate_sales_income",
335 "total_operating_cost",
336 "real_estate_sales_cost",
337 "research_expenses",
338 "surrender_value",
339 "net_payments",
340 "net_withdrawal_ins_con_res",
341 "policy_dividend_expenses",
342 "reinsurance_cost",
343 "change_income_fair_value",
344 "futures_loss",
345 "trust_income",
346 "subsidize_revenue",
347 "other_business_profits",
348 "net_profit_excl_merged_int_inc",
349 "int_inc",
350 "handling_chrg_comm_inc",
351 "less_handling_chrg_comm_exp",
352 "other_bus_cost",
353 "plus_net_gain_fx_trans",
354 "il_net_loss_disp_noncur_asset",
355 "inc_tax",
356 "unconfirmed_invest_loss",
357 "net_profit_excl_min_int_inc",
358 "less_int_exp",
359 "other_bus_inc",
360 "revenue",
361 "total_expense",
362 "less_taxes_surcharges_ops",
363 "sale_expense",
364 "less_gerl_admin_exp",
365 "financial_expense",
366 "less_impair_loss_assets",
367 "plus_net_invest_inc",
368 "incl_inc_invest_assoc_jv_entp",
369 "oper_profit",
370 "plus_non_oper_rev",
371 "less_non_oper_exp",
372 "tot_profit",
373 "net_profit_incl_min_int_inc",
374 "net_profit_incl_min_int_inc_after",
375 "minority_int_inc",
376 "s_fa_eps_basic",
377 "s_fa_eps_diluted",
378 "total_income",
379 "total_income_minority",
380 "other_compreh_inc",
381 "operating_revenue",
382 "cost_of_goods_sold",
383 "m_netinterestIncome",
384 "m_netFeesCommissions",
385 "m_insuranceBusiness",
386 "m_separatePremium",
387 "m_asideReservesUndueLiabilities",
388 "m_paymentsInsuranceClaims",
389 "m_amortizedCompensationExpenses",
390 "m_netReserveInsuranceLiability",
391 "m_policyReserve",
392 "m_amortizeInsuranceReserve",
393 "m_nsuranceFeesCommissionExpenses",
394 "m_operationAdministrativeExpense",
395 "m_amortizedReinsuranceExpenditure",
396 "m_netProfitLossdisposalNonassets",
397 "m_otherItemsAffectingNetProfit",
398 "__extra_66",
399 "__extra_67",
400 "__extra_68",
401 "__extra_69",
402 "__extra_70",
403 "__extra_71",
404 "__extra_72",
405 "__extra_73",
406 "__extra_74",
407 "__extra_75",
408 "__extra_76",
409 "__extra_77",
410 "__extra_78",
411 "__extra_79",
412];
413
414const CASHFLOW_COLUMN_NAMES: [&str; 111] = [
415 "cash_received_ori_ins_contract_pre",
416 "net_cash_received_rei_ope",
417 "net_increase_insured_funds",
418 "net_increase_in_disposal",
419 "cash_for_interest",
420 "net_increase_in_repurchase_funds",
421 "cash_for_payment_original_insurance",
422 "cash_payment_policy_dividends",
423 "disposal_other_business_units",
424 "net_cash_deal_subcompany",
425 "cash_received_from_pledges",
426 "cash_paid_for_investments",
427 "net_increase_in_pledged_loans",
428 "cash_paid_by_subsidiaries",
429 "increase_in_cash_paid",
430 "fix_intan_other_asset_dispo_cash_payment",
431 "cash_from_mino_s_invest_sub",
432 "cass_received_sub_abs",
433 "cass_received_sub_investments",
434 "minority_shareholder_profit_loss",
435 "unrecognized_investment_losses",
436 "ncrease_deferred_income",
437 "projected_liability",
438 "increase_operational_payables",
439 "reduction_outstanding_amounts_less",
440 "reduction_outstanding_amounts_more",
441 "goods_sale_and_service_render_cash",
442 "net_incr_dep_cob",
443 "net_incr_loans_central_bank",
444 "net_incr_fund_borr_ofi",
445 "net_incr_fund_borr_ofi",
446 "tax_levy_refund",
447 "cash_paid_invest",
448 "other_cash_pay_ral_inv_act",
449 "other_cash_recp_ral_oper_act",
450 "stot_cash_inflows_oper_act",
451 "goods_and_services_cash_paid",
452 "net_incr_clients_loan_adv",
453 "net_incr_dep_cbob",
454 "handling_chrg_paid",
455 "cash_pay_beh_empl",
456 "pay_all_typ_tax",
457 "other_cash_pay_ral_oper_act",
458 "stot_cash_outflows_oper_act",
459 "net_cash_flows_oper_act",
460 "cash_recp_disp_withdrwl_invest",
461 "cash_recp_return_invest",
462 "net_cash_recp_disp_fiolta",
463 "other_cash_recp_ral_inv_act",
464 "stot_cash_inflows_inv_act",
465 "cash_pay_acq_const_fiolta",
466 "stot_cash_outflows_inv_act",
467 "net_cash_flows_inv_act",
468 "cash_recp_cap_contrib",
469 "cash_recp_borrow",
470 "proc_issue_bonds",
471 "other_cash_recp_ral_fnc_act",
472 "stot_cash_inflows_fnc_act",
473 "cash_prepay_amt_borr",
474 "cash_pay_dist_dpcp_int_exp",
475 "other_cash_pay_ral_fnc_act",
476 "stot_cash_outflows_fnc_act",
477 "net_cash_flows_fnc_act",
478 "eff_fx_flu_cash",
479 "net_incr_cash_cash_equ",
480 "cash_cash_equ_beg_period",
481 "cash_cash_equ_end_period",
482 "net_profit",
483 "plus_prov_depr_assets",
484 "depr_fa_coga_dpba",
485 "amort_intang_assets",
486 "amort_lt_deferred_exp",
487 "decr_deferred_exp",
488 "incr_acc_exp",
489 "loss_disp_fiolta",
490 "loss_scr_fa",
491 "loss_fv_chg",
492 "fin_exp",
493 "invest_loss",
494 "decr_deferred_inc_tax_assets",
495 "incr_deferred_inc_tax_liab",
496 "decr_inventories",
497 "decr_oper_payable",
498 "others",
499 "im_net_cash_flows_oper_act",
500 "conv_debt_into_cap",
501 "conv_corp_bonds_due_within_1y",
502 "fa_fnc_leases",
503 "end_bal_cash",
504 "less_beg_bal_cash",
505 "plus_end_bal_cash_equ",
506 "less_beg_bal_cash_equ",
507 "im_net_incr_cash_cash_equ",
508 "m_netDecreaseUnwindingFunds",
509 "m_netReductionPurchaseRebates",
510 "m_netIncreaseDepositsBanks",
511 "m_netCashReinsuranceBusiness",
512 "m_netReductionDeposInveFunds",
513 "m_netIncreaseUnwindingFunds",
514 "m_netReductionAmountBorrowedFunds",
515 "m_netReductionSaleRepurchaseProceeds",
516 "m_paymentOtherCashRelated",
517 "m_cashOutFlowsInvesactivities",
518 "m_absorbCashEquityInv",
519 "m_otherImpactsOnCash",
520 "m_addOperatingReceivableItems",
521 "__extra_106",
522 "__extra_107",
523 "__extra_108",
524 "__extra_109",
525 "__extra_110",
526];
527
528const RATIO_COLUMN_NAMES: [&str; 41] = [
529 "s_fa_ocfps",
530 "s_fa_bps",
531 "s_fa_eps_basic",
532 "s_fa_eps_diluted",
533 "s_fa_undistributedps",
534 "s_fa_surpluscapitalps",
535 "adjusted_earnings_per_share",
536 "du_return_on_equity",
537 "sales_gross_profit",
538 "inc_revenue_rate",
539 "du_profit_rate",
540 "inc_net_profit_rate",
541 "adjusted_net_profit_rate",
542 "inc_total_revenue_annual",
543 "inc_net_profit_to_shareholders_annual",
544 "adjusted_profit_to_profit_annual",
545 "equity_roe",
546 "net_roe",
547 "total_roe",
548 "gross_profit",
549 "net_profit",
550 "actual_tax_rate",
551 "pre_pay_operate_income",
552 "sales_cash_flow",
553 "pre_pay_operate_income",
554 "sales_cash_flow",
555 "gear_ratio",
556 "inventory_turnover",
557 "m_anntime",
558 "m_timetag",
559 "inc_revenue",
560 "inc_gross_profit",
561 "inc_profit_before_tax",
562 "du_profit",
563 "inc_net_profit",
564 "adjusted_net_profit",
565 "__extra_36",
566 "__extra_37",
567 "__extra_38",
568 "__extra_39",
569 "__extra_40",
570];
571
572impl FinanceRecord {
573 pub fn column_names(&self) -> Option<&'static [&'static str]> {
577 FinanceReader::column_names(self.file_type)
578 }
579
580 pub fn named_values(&self) -> Option<Vec<(&'static str, f64)>> {
584 let names = self.column_names()?;
585 match &self.data {
586 FinanceData::Report { columns } => {
587 Some(names.iter().copied().zip(columns.iter().copied()).collect())
588 }
589 FinanceData::Ratios { ratios } => {
590 Some(names.iter().copied().zip(ratios.iter().copied()).collect())
591 }
592 _ => None,
593 }
594 }
595}
596
597pub struct FinanceReader;
599
600impl FinanceReader {
601 pub fn column_names(file_type: FileType) -> Option<&'static [&'static str]> {
605 match file_type {
606 FileType::BalanceSheet => Some(&BALANCE_COLUMN_NAMES),
607 FileType::Income => Some(&INCOME_COLUMN_NAMES),
608 FileType::CashFlow => Some(&CASHFLOW_COLUMN_NAMES),
609 FileType::Ratios => Some(&RATIO_COLUMN_NAMES),
610 _ => None,
611 }
612 }
613
614 pub fn read_file(path: impl AsRef<Path>) -> Result<Vec<FinanceRecord>, FinanceError> {
630 let path = path.as_ref();
631 Self::validate_dat_path(path)?;
632 let file_type = Self::detect_type(path)?;
633 let file = File::open(path)?;
634
635 let mmap = unsafe { MmapOptions::new().map(&file)? };
637 let data = &mmap[..];
638
639 match file_type {
640 FileType::BalanceSheet => Self::parse_fixed(data, STRIDE_BALANCE, 0, |body| {
641 let mut cols = Vec::with_capacity(COLUMNS_BALANCE);
642 for i in 0..COLUMNS_BALANCE {
643 cols.push(Self::read_f64(body, i * 8).unwrap_or(f64::NAN));
644 }
645 FinanceData::Report { columns: cols }
646 }),
647 FileType::Income => Self::parse_fixed(data, STRIDE_INCOME, 8, |body| {
648 let mut cols = Vec::with_capacity(COLUMNS_INCOME);
649 for i in 0..COLUMNS_INCOME {
650 cols.push(Self::read_f64(body, i * 8).unwrap_or(f64::NAN));
651 }
652 FinanceData::Report { columns: cols }
653 }),
654 FileType::CashFlow => Self::parse_fixed(data, STRIDE_CASHFLOW, 8, |body| {
655 let mut cols = Vec::with_capacity(COLUMNS_CASHFLOW);
656 for i in 0..COLUMNS_CASHFLOW {
657 cols.push(Self::read_f64(body, i * 8).unwrap_or(f64::NAN));
658 }
659 FinanceData::Report { columns: cols }
660 }),
661 FileType::Ratios => Self::parse_fixed(data, STRIDE_RATIOS, 0, |body| {
662 let mut cols = Vec::with_capacity(COLUMNS_RATIOS);
663 for i in 0..COLUMNS_RATIOS {
664 cols.push(Self::read_f64(body, i * 8).unwrap_or(f64::NAN));
665 }
666 FinanceData::Ratios { ratios: cols }
667 }),
668 FileType::Capital => {
669 Self::parse_fixed(data, STRIDE_CAPITAL, 0, |body| {
670 FinanceData::Capital {
672 total_share: Self::read_f64(body, 0).unwrap_or(0.0),
673 flow_share: Self::read_f64(body, 8).unwrap_or(0.0),
674 restricted: Self::read_f64(body, 16).unwrap_or(0.0),
675 free_float_share: Self::read_f64(body, 24).unwrap_or(0.0),
676 }
677 })
678 }
679 FileType::HolderCount => {
680 Self::parse_7005_fixed(data)
686 }
687 FileType::TopFlowHolder | FileType::TopHolder => {
688 Self::parse_top_holders(data, file_type)
689 }
690 }
691 }
692
693 fn detect_type(path: &Path) -> Result<FileType, FinanceError> {
695 let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
696 let id_part = stem.split('_').last().unwrap_or("");
697 let id = id_part
698 .parse::<u16>()
699 .map_err(|_| FinanceError::Parse("Invalid Filename".into()))?;
700 FileType::from_id(id).ok_or(FinanceError::UnsupportedType(id))
701 }
702
703 fn validate_dat_path(path: &Path) -> Result<(), FinanceError> {
704 let ext = path
705 .extension()
706 .and_then(|s| s.to_str())
707 .unwrap_or_default()
708 .to_ascii_lowercase();
709 if ext != "dat" {
710 return Err(FinanceError::InvalidExtension(path.display().to_string()));
711 }
712 Ok(())
713 }
714
715 fn parse_fixed<F>(
718 data: &[u8],
719 stride: usize,
720 header_offset: usize,
721 parser: F,
722 ) -> Result<Vec<FinanceRecord>, FinanceError>
723 where
724 F: Fn(&[u8]) -> FinanceData,
725 {
726 let mut results = Vec::new();
727 let mut cursor = 0;
728 let len = data.len();
729
730 while cursor + header_offset + 16 <= len {
731 let header_start = cursor + header_offset;
733 let ts1 = LittleEndian::read_i64(&data[header_start..header_start + 8]);
734 let ts2 = LittleEndian::read_i64(&data[header_start + 8..header_start + 16]);
735
736 if Self::is_valid_ts(ts1) {
738 if cursor + stride <= len {
740 let report_date = Self::ts_to_bj(ts1);
741 let announce_date = if Self::is_valid_ts(ts2) {
743 Self::ts_to_bj(ts2)
744 } else {
745 report_date };
747
748 let body = &data[header_start + 16..cursor + stride];
749 results.push(FinanceRecord {
750 file_type: Self::file_type_from_stride(stride)?,
751 report_date,
752 announce_date,
753 data: parser(body),
754 });
755
756 cursor += stride;
757 continue;
758 }
759 }
760 cursor += 8;
762 }
763 Ok(results)
764 }
765
766 fn parse_7005_fixed(data: &[u8]) -> Result<Vec<FinanceRecord>, FinanceError> {
769 let mut results = Vec::new();
770 let mut cursor = 0;
771 let stride = STRIDE_HOLDER;
772
773 while cursor + 16 <= data.len() {
774 let ts1 = LittleEndian::read_i64(&data[cursor..cursor + 8]); let ts2 = LittleEndian::read_i64(&data[cursor + 8..cursor + 16]); if Self::is_valid_ts(ts2) {
779 if cursor + stride <= data.len() {
780 let report_date = Self::ts_to_bj(ts2);
781 let announce_date = if Self::is_valid_ts(ts1) {
782 Self::ts_to_bj(ts1)
783 } else {
784 report_date
785 };
786
787 let body = &data[cursor + 16..cursor + stride];
788 let total_holders = Self::read_f64(body, 0).unwrap_or(0.0) as i64;
789 let a_holders = Self::read_f64(body, 8).unwrap_or(0.0) as i64;
790 let b_holders = Self::read_f64(body, 16).unwrap_or(0.0) as i64;
791 let h_holders = Self::read_f64(body, 24).unwrap_or(0.0) as i64;
792 let float_holders = Self::read_f64(body, 32).unwrap_or(0.0) as i64;
793 let other_holders = Self::read_f64(body, 40).unwrap_or(0.0) as i64;
794
795 results.push(FinanceRecord {
796 file_type: FileType::HolderCount,
797 report_date,
798 announce_date,
799 data: FinanceData::HolderCount {
800 total_holders,
801 a_holders,
802 b_holders,
803 h_holders,
804 float_holders,
805 other_holders,
806 },
807 });
808 cursor += stride;
809 continue;
810 }
811 }
812 cursor += 8;
813 }
814 Ok(results)
815 }
816
817 fn parse_top_holders(
819 data: &[u8],
820 file_type: FileType,
821 ) -> Result<Vec<FinanceRecord>, FinanceError> {
822 let mut results = Vec::new();
823 let mut current_report_ts = 0i64;
824 let mut current_announce_ts = 0i64;
825 let mut current_holders = Vec::new();
826
827 for chunk in data.chunks_exact(STRIDE_TOP_HOLDER) {
828 let announce_ts = LittleEndian::read_i64(&chunk[0..8]);
829 let report_ts = LittleEndian::read_i64(&chunk[8..16]);
830 if !Self::is_valid_ts(announce_ts) || !Self::is_valid_ts(report_ts) {
831 continue;
832 }
833
834 let holder = Self::parse_top_holder_record(chunk);
835 if current_holders.is_empty() {
836 current_report_ts = report_ts;
837 current_announce_ts = announce_ts;
838 }
839
840 if report_ts != current_report_ts || announce_ts != current_announce_ts {
841 results.push(FinanceRecord {
842 file_type,
843 report_date: Self::ts_to_bj(current_report_ts),
844 announce_date: Self::ts_to_bj(current_announce_ts),
845 data: FinanceData::TopHolder {
846 holders: std::mem::take(&mut current_holders),
847 },
848 });
849 current_report_ts = report_ts;
850 current_announce_ts = announce_ts;
851 }
852
853 current_holders.push(holder);
854 }
855
856 if !current_holders.is_empty() {
857 results.push(FinanceRecord {
858 file_type,
859 report_date: Self::ts_to_bj(current_report_ts),
860 announce_date: Self::ts_to_bj(current_announce_ts),
861 data: FinanceData::TopHolder {
862 holders: current_holders,
863 },
864 });
865 }
866
867 Ok(results)
868 }
869
870 fn parse_top_holder_record(record: &[u8]) -> Shareholder {
871 Shareholder {
872 name: Self::read_string(record, 16, 192),
873 holder_type: Self::read_string(record, 216, 56),
874 hold_amount: Self::read_f64(record, 272).unwrap_or(0.0),
875 change_reason: Self::read_string(record, 280, 16),
876 hold_ratio: Self::read_f64(record, 304).unwrap_or(0.0),
877 share_type: Self::read_string(record, 312, 96),
878 rank: LittleEndian::read_u32(&record[412..416]),
879 }
880 }
881
882 fn file_type_from_stride(stride: usize) -> Result<FileType, FinanceError> {
883 match stride {
884 STRIDE_BALANCE => Ok(FileType::BalanceSheet),
885 STRIDE_INCOME => Ok(FileType::Income),
886 STRIDE_CASHFLOW => Ok(FileType::CashFlow),
887 STRIDE_RATIOS => Ok(FileType::Ratios),
888 STRIDE_CAPITAL => Ok(FileType::Capital),
889 STRIDE_HOLDER => Ok(FileType::HolderCount),
890 _ => Err(FinanceError::Parse(format!(
891 "Unknown finance stride: {}",
892 stride
893 ))),
894 }
895 }
896
897 fn is_valid_ts(ts: i64) -> bool {
898 ts >= MIN_VALID_TS && ts <= MAX_VALID_TS
899 }
900
901 fn ts_to_bj(ts: i64) -> BjDateTime {
903 let tz = FixedOffset::east_opt(8 * 3600).unwrap();
905 let secs = ts / 1000;
906 let nsecs = (ts % 1000) * 1_000_000;
907 tz.timestamp_opt(secs, nsecs as u32)
908 .single()
909 .unwrap_or_default()
910 }
911
912 fn read_f64(data: &[u8], offset: usize) -> Option<f64> {
914 if offset + 8 > data.len() {
915 return None;
916 }
917 let u = LittleEndian::read_u64(&data[offset..offset + 8]);
918 if u == QMT_NAN_HEX {
919 return None;
920 }
921 let f = f64::from_bits(u);
922 if f.is_nan() { None } else { Some(f) }
923 }
924
925 fn read_string(data: &[u8], offset: usize, max_len: usize) -> String {
927 if offset >= data.len() {
928 return String::new();
929 }
930 let end = (offset + max_len).min(data.len());
931 let slice = &data[offset..end];
932 let actual_len = slice.iter().position(|&c| c == 0).unwrap_or(slice.len());
934 String::from_utf8_lossy(&slice[..actual_len])
935 .trim()
936 .to_string()
937 }
938}
939
940#[cfg(test)]
941mod tests {
942 use super::*;
943 use std::path::PathBuf;
944
945 fn get_fixture(file: &str) -> PathBuf {
946 PathBuf::from("/home/sunnysab/Code/trade-rs/qmt-parser/finance/").join(file)
947 }
948
949 fn print_head(type_id: u16, records: &[FinanceRecord]) {
951 println!(
952 "\n>>> [Type {}] Found {} records. Showing first 5:",
953 type_id,
954 records.len()
955 );
956 for (i, rec) in records.iter().take(5).enumerate() {
957 println!(
958 "#{:03} | Report: {} | Announce: {}",
959 i,
960 rec.report_date.format("%Y-%m-%d"),
961 rec.announce_date.format("%Y-%m-%d")
962 );
963 println!("Data: {:#?}\n", rec.data);
965 }
966 if records.is_empty() {
967 println!("(No records found)\n");
968 } else {
969 println!(
970 "... (remaining {} records omitted)\n",
971 records.len().saturating_sub(5)
972 );
973 }
974 }
975
976 #[test]
977 fn test_7001_balance_sheet() {
978 let path = get_fixture("002419_7001.DAT");
979 if !path.exists() {
980 eprintln!("Skipping 7001: File not found");
981 return;
982 }
983
984 let res = FinanceReader::read_file(&path).expect("Failed to parse 7001");
985 assert!(!res.is_empty(), "7001 should not be empty");
986
987 if let FinanceData::Report { columns } = &res[0].data {
988 assert_eq!(columns.len(), 156);
989 assert!(columns[0].is_nan());
990 assert!((columns[11] - 273_297_896.39).abs() < 1e-6);
991 } else {
992 panic!("7001 parsed as wrong type");
993 }
994
995 print_head(7001, &res);
996 }
997
998 #[test]
999 fn test_7002_income() {
1000 let path = get_fixture("002419_7002.DAT");
1001 if !path.exists() {
1002 eprintln!("Skipping 7002: File not found");
1003 return;
1004 }
1005
1006 let res = FinanceReader::read_file(&path).expect("Failed to parse 7002");
1007 assert!(!res.is_empty(), "7002 should not be empty");
1008
1009 if let FinanceData::Report { columns } = &res[0].data {
1010 assert_eq!(columns.len(), 80);
1011 assert!((columns[0] - 4_809_251_460.5).abs() < 1e-6);
1012 assert!(columns[1].is_nan());
1013 } else {
1014 panic!("7002 parsed as wrong type");
1015 }
1016
1017 print_head(7002, &res);
1018 }
1019
1020 #[test]
1021 fn test_7003_cashflow() {
1022 let path = get_fixture("002419_7003.DAT");
1023 if !path.exists() {
1024 eprintln!("Skipping 7003: File not found");
1025 return;
1026 }
1027
1028 let res = FinanceReader::read_file(&path).expect("Failed to parse 7003");
1029 assert!(!res.is_empty(), "7003 should not be empty");
1030
1031 if let FinanceData::Report { columns } = &res[0].data {
1032 assert_eq!(columns.len(), 111);
1033 assert!(columns[0].is_nan());
1034 assert!((columns[23] - 5_506_707_615.58).abs() < 1e-6);
1035 } else {
1036 panic!("7003 parsed as wrong type");
1037 }
1038
1039 print_head(7003, &res);
1040 }
1041
1042 #[test]
1043 fn test_7004_capital() {
1044 let path = get_fixture("002419_7004.DAT");
1045 if !path.exists() {
1046 eprintln!("Skipping 7004: File not found");
1047 return;
1048 }
1049
1050 let res = FinanceReader::read_file(&path).expect("Failed to parse 7004");
1051 assert!(!res.is_empty(), "7004 should not be empty");
1052
1053 if let FinanceData::Capital {
1054 total_share,
1055 flow_share,
1056 restricted,
1057 free_float_share,
1058 } = &res[0].data
1059 {
1060 assert_eq!(*total_share, 400_100_000.0);
1061 assert_eq!(*flow_share, 40_080_000.0);
1062 assert_eq!(*restricted, 0.0);
1063 assert_eq!(*free_float_share, 40_080_000.0);
1064 } else {
1065 panic!("7004 parsed as wrong type");
1066 }
1067
1068 print_head(7004, &res);
1069 }
1070
1071 #[test]
1072 fn test_7005_holder_count() {
1073 let path = get_fixture("002419_7005.DAT");
1074 if !path.exists() {
1075 eprintln!("Skipping 7005: File not found");
1076 return;
1077 }
1078
1079 let res = FinanceReader::read_file(&path).expect("Failed to parse 7005");
1080 assert!(!res.is_empty(), "7005 should not be empty");
1081
1082 if let FinanceData::HolderCount {
1083 total_holders,
1084 a_holders,
1085 b_holders,
1086 h_holders,
1087 float_holders,
1088 other_holders,
1089 } = &res[0].data
1090 {
1091 assert_eq!(*total_holders, 35_719);
1092 assert_eq!(*a_holders, 35_719);
1093 assert_eq!(*b_holders, 0);
1094 assert_eq!(*h_holders, 0);
1095 assert_eq!(*float_holders, 0);
1096 assert_eq!(*other_holders, 0);
1097 } else {
1098 panic!("7005 parsed as wrong type");
1099 }
1100
1101 print_head(7005, &res);
1102 }
1103
1104 #[test]
1105 fn test_7006_top_float_holder() {
1106 let path = get_fixture("002419_7006.DAT");
1107 if !path.exists() {
1108 eprintln!("Skipping 7006: File not found");
1109 return;
1110 }
1111
1112 let res = FinanceReader::read_file(&path).expect("Failed to parse 7006");
1113 assert!(!res.is_empty(), "7006 should not be empty");
1114
1115 if let FinanceData::TopHolder { holders } = &res[0].data {
1116 assert_eq!(holders.len(), 40);
1117 assert_eq!(holders[0].name, "中国航空技术深圳有限公司");
1118 assert_eq!(holders[0].holder_type, "机构投资账户");
1119 assert_eq!(holders[0].hold_amount, 158_128_000.0);
1120 assert_eq!(holders[0].change_reason, "不变");
1121 assert_eq!(holders[0].hold_ratio, 39.52);
1122 assert_eq!(holders[0].share_type, "流通A股");
1123 assert_eq!(holders[0].rank, 1);
1124 } else {
1125 panic!("7006 parsed as wrong type");
1126 }
1127
1128 print_head(7006, &res);
1129 }
1130
1131 #[test]
1132 fn test_7007_top_holder() {
1133 let path = get_fixture("002419_7007.DAT");
1134 if !path.exists() {
1135 eprintln!("Skipping 7007: File not found");
1136 return;
1137 }
1138
1139 let res = FinanceReader::read_file(&path).expect("Failed to parse 7007");
1140 assert!(!res.is_empty(), "7007 should not be empty");
1141
1142 if let FinanceData::TopHolder { holders } = &res[0].data {
1143 assert_eq!(holders.len(), 10);
1144 assert_eq!(holders[0].name, "中国工商银行-诺安股票证券投资基金");
1145 assert_eq!(holders[0].holder_type, "机构投资账户");
1146 assert_eq!(holders[0].hold_amount, 1_799_860.0);
1147 assert_eq!(holders[0].change_reason, "不变");
1148 assert_eq!(holders[0].hold_ratio, 0.45);
1149 assert_eq!(holders[0].share_type, "流通A股");
1150 assert_eq!(holders[0].rank, 1);
1151 } else {
1152 panic!("7007 parsed as wrong type");
1153 }
1154
1155 print_head(7007, &res);
1156 }
1157
1158 #[test]
1159 fn test_7008_ratios() {
1160 let path = get_fixture("002419_7008.DAT");
1161 if !path.exists() {
1162 eprintln!("Skipping 7008: File not found");
1163 return;
1164 }
1165
1166 let res = FinanceReader::read_file(&path).expect("Failed to parse 7008");
1167 assert!(!res.is_empty(), "7008 should not be empty");
1168
1169 if let FinanceData::Ratios { ratios } = &res[0].data {
1170 assert_eq!(ratios.len(), 41);
1171 } else {
1172 panic!("7008 parsed as wrong type");
1173 }
1174
1175 print_head(7008, &res);
1176 }
1177
1178 #[test]
1179 fn test_report_column_names() {
1180 let balance = FinanceReader::column_names(FileType::BalanceSheet).expect("7001 names");
1181 assert_eq!(balance.len(), 156);
1182 assert_eq!(balance[0], "internal_shoule_recv");
1183 assert_eq!(balance[44], "cash_equivalents");
1184 assert_eq!(balance[140], "m_otherReserves");
1185 assert_eq!(balance[141], "__extra_141");
1186
1187 let income = FinanceReader::column_names(FileType::Income).expect("7002 names");
1188 assert_eq!(income.len(), 80);
1189 assert_eq!(income[0], "revenue_inc");
1190 assert_eq!(income[3], "total_operating_cost");
1191 assert_eq!(income[65], "m_otherItemsAffectingNetProfit");
1192 assert_eq!(income[66], "__extra_66");
1193
1194 let cashflow = FinanceReader::column_names(FileType::CashFlow).expect("7003 names");
1195 assert_eq!(cashflow.len(), 111);
1196 assert_eq!(cashflow[0], "cash_received_ori_ins_contract_pre");
1197 assert_eq!(cashflow[26], "goods_sale_and_service_render_cash");
1198 assert_eq!(cashflow[105], "m_addOperatingReceivableItems");
1199 assert_eq!(cashflow[106], "__extra_106");
1200
1201 let ratios = FinanceReader::column_names(FileType::Ratios).expect("7008 names");
1202 assert_eq!(ratios.len(), 41);
1203 assert_eq!(ratios[0], "s_fa_ocfps");
1204 assert_eq!(ratios[1], "s_fa_bps");
1205 assert_eq!(ratios[35], "adjusted_net_profit");
1206 assert_eq!(ratios[36], "__extra_36");
1207 }
1208
1209 #[test]
1210 fn test_named_values_for_ratios() {
1211 let path = get_fixture("002419_7008.DAT");
1212 if !path.exists() {
1213 eprintln!("Skipping 7008: File not found");
1214 return;
1215 }
1216
1217 let res = FinanceReader::read_file(&path).expect("Failed to parse 7008");
1218 let named = res[0].named_values().expect("named ratios");
1219
1220 assert_eq!(named[0].0, "s_fa_ocfps");
1221 assert!((named[0].1 - 0.5646).abs() < 1e-9);
1222 assert_eq!(named[1].0, "s_fa_bps");
1223 assert!((named[1].1 - 7.62).abs() < 1e-9);
1224 }
1225
1226 #[test]
1227 fn test_finance_record_is_self_describing() {
1228 let path = get_fixture("002419_7001.DAT");
1229 if !path.exists() {
1230 eprintln!("Skipping 7001: File not found");
1231 return;
1232 }
1233
1234 let res = FinanceReader::read_file(&path).expect("Failed to parse 7001");
1235 let first = &res[0];
1236 assert_eq!(first.file_type, FileType::BalanceSheet);
1237 let names = first.column_names().expect("balance sheet names");
1238 assert_eq!(names[0], "internal_shoule_recv");
1239
1240 let named = first.named_values().expect("self-described values");
1241 assert_eq!(named[0].0, "internal_shoule_recv");
1242 }
1243}