1use std::fs::File;
8use std::io;
9use std::path::Path;
10
11use byteorder::{ByteOrder, LittleEndian};
12use chrono::{DateTime, FixedOffset, TimeZone};
13use memmap2::MmapOptions;
14use thiserror::Error;
15
16const STRIDE_BALANCE: usize = 1264; const STRIDE_INCOME: usize = 664; const STRIDE_CASHFLOW: usize = 920; const STRIDE_RATIOS: usize = 344; const STRIDE_CAPITAL: usize = 56; const STRIDE_HOLDER: usize = 64; const STRIDE_TOP_HOLDER: usize = 416; const COLUMNS_BALANCE: usize = 156;
24const COLUMNS_INCOME: usize = 80;
25const COLUMNS_CASHFLOW: usize = 111;
26const COLUMNS_RATIOS: usize = 41;
27
28const MIN_VALID_TS: i64 = 631_152_000_000;
30const MAX_VALID_TS: i64 = 2_524_608_000_000;
31const QMT_NAN_HEX: u64 = 0x7FEFFFFFFFFFFFFF;
32
33#[derive(Debug, Error)]
36pub enum FinanceError {
37 #[error("IO Error: {0}")]
39 Io(#[from] io::Error),
40 #[error("Invalid File Extension: {0}")]
42 InvalidExtension(String),
43 #[error("Unsupported File Type ID: {0}")]
45 UnsupportedType(u16),
46 #[error("Parse Error: {0}")]
48 Parse(String),
49}
50
51pub type BjDateTime = DateTime<FixedOffset>;
53
54#[derive(Debug, Clone)]
58pub struct FinanceRecord {
59 pub file_type: FileType,
61 pub report_date: BjDateTime,
63 pub announce_date: BjDateTime,
65 pub data: FinanceData,
67}
68
69#[derive(Debug, Clone)]
71pub enum FinanceData {
72 Report {
74 columns: Vec<f64>,
76 },
77 Capital {
79 total_share: f64,
81 flow_share: f64,
83 restricted: f64,
85 free_float_share: f64,
87 },
88 HolderCount {
90 total_holders: i64,
92 a_holders: i64,
94 b_holders: i64,
96 h_holders: i64,
98 float_holders: i64,
100 other_holders: i64,
102 },
103 TopHolder {
105 holders: Vec<Shareholder>,
107 },
108 Ratios {
110 ratios: Vec<f64>,
112 },
113}
114
115#[derive(Debug, Clone)]
117pub struct Shareholder {
118 pub name: String,
120 pub holder_type: String,
122 pub hold_amount: f64,
124 pub change_reason: String,
126 pub hold_ratio: f64,
128 pub share_type: String,
130 pub rank: u32,
132}
133
134#[derive(Debug, Clone, Copy, PartialEq)]
135pub enum FileType {
137 BalanceSheet = 7001,
139 Income = 7002,
141 CashFlow = 7003,
143 Capital = 7004,
145 HolderCount = 7005,
147 TopFlowHolder = 7006,
149 TopHolder = 7007,
151 Ratios = 7008,
153}
154
155impl FileType {
156 pub fn from_id(id: u16) -> Option<Self> {
158 match id {
159 7001 => Some(Self::BalanceSheet),
160 7002 => Some(Self::Income),
161 7003 => Some(Self::CashFlow),
162 7004 => Some(Self::Capital),
163 7005 => Some(Self::HolderCount),
164 7006 => Some(Self::TopFlowHolder),
165 7007 => Some(Self::TopHolder),
166 7008 => Some(Self::Ratios),
167 _ => None,
168 }
169 }
170}
171
172const BALANCE_COLUMN_NAMES: [&str; 156] = [
173 "internal_shoule_recv",
174 "fixed_capital_clearance",
175 "should_pay_money",
176 "settlement_payment",
177 "receivable_premium",
178 "accounts_receivable_reinsurance",
179 "reinsurance_contract_reserve",
180 "dividends_payable",
181 "tax_rebate_for_export",
182 "subsidies_receivable",
183 "deposit_receivable",
184 "apportioned_cost",
185 "profit_and_current_assets_with_deal",
186 "current_assets_one_year",
187 "long_term_receivables",
188 "other_long_term_investments",
189 "original_value_of_fixed_assets",
190 "net_value_of_fixed_assets",
191 "depreciation_reserves_of_fixed_assets",
192 "productive_biological_assets",
193 "public_welfare_biological_assets",
194 "oil_and_gas_assets",
195 "development_expenditure",
196 "right_of_split_share_distribution",
197 "other_non_mobile_assets",
198 "handling_fee_and_commission",
199 "other_payables",
200 "margin_payable",
201 "internal_accounts_payable",
202 "advance_cost",
203 "insurance_contract_reserve",
204 "broker_buying_and_selling_securities",
205 "acting_underwriting_securities",
206 "international_ticket_settlement",
207 "domestic_ticket_settlement",
208 "deferred_income",
209 "short_term_bonds_payable",
210 "long_term_deferred_income",
211 "undetermined_investment_losses",
212 "quasi_distribution_of_cash_dividends",
213 "provisions_not",
214 "cust_bank_dep",
215 "provisions",
216 "less_tsy_stk",
217 "cash_equivalents",
218 "loans_to_oth_banks",
219 "tradable_fin_assets",
220 "derivative_fin_assets",
221 "bill_receivable",
222 "account_receivable",
223 "advance_payment",
224 "int_rcv",
225 "other_receivable",
226 "red_monetary_cap_for_sale",
227 "agency_bus_assets",
228 "inventories",
229 "other_current_assets",
230 "total_current_assets",
231 "loans_and_adv_granted",
232 "fin_assets_avail_for_sale",
233 "held_to_mty_invest",
234 "long_term_eqy_invest",
235 "invest_real_estate",
236 "accumulated_depreciation",
237 "fix_assets",
238 "constru_in_process",
239 "construction_materials",
240 "long_term_liabilities",
241 "intang_assets",
242 "goodwill",
243 "long_deferred_expense",
244 "deferred_tax_assets",
245 "total_non_current_assets",
246 "tot_assets",
247 "shortterm_loan",
248 "borrow_central_bank",
249 "loans_oth_banks",
250 "tradable_fin_liab",
251 "derivative_fin_liab",
252 "notes_payable",
253 "accounts_payable",
254 "advance_peceipts",
255 "fund_sales_fin_assets_rp",
256 "empl_ben_payable",
257 "taxes_surcharges_payable",
258 "int_payable",
259 "dividend_payable",
260 "other_payable",
261 "non_current_liability_in_one_year",
262 "other_current_liability",
263 "total_current_liability",
264 "long_term_loans",
265 "bonds_payable",
266 "longterm_account_payable",
267 "grants_received",
268 "deferred_tax_liab",
269 "other_non_current_liabilities",
270 "non_current_liabilities",
271 "tot_liab",
272 "cap_stk",
273 "cap_rsrv",
274 "specific_reserves",
275 "surplus_rsrv",
276 "prov_nom_risks",
277 "undistributed_profit",
278 "cnvd_diff_foreign_curr_stat",
279 "tot_shrhldr_eqy_excl_min_int",
280 "minority_int",
281 "total_equity",
282 "tot_liab_shrhldr_eqy",
283 "inventory_depreciation_reserve",
284 "current_ratio",
285 "m_cashAdepositsCentralBank",
286 "m_nobleMetal",
287 "m_depositsOtherFinancialInstitutions",
288 "m_currentInvestment",
289 "m_redemptoryMonetaryCapitalSale",
290 "m_netAmountSubrogation",
291 "m_refundableDeposits",
292 "m_netAmountLoanPledged",
293 "m_fixedTimeDeposit",
294 "m_netLongtermDebtInvestments",
295 "m_permanentInvestment",
296 "m_depositForcapitalRecognizance",
297 "m_netBalConstructionProgress",
298 "m_separateAccountAssets",
299 "m_capitalInvicariousBussiness",
300 "m_otherAssets",
301 "m_depositsWithBanksOtherFinancialIns",
302 "m_indemnityPayable",
303 "m_policyDividendPayable",
304 "m_guaranteeInvestmentFunds",
305 "m_premiumsReceivedAdvance",
306 "m_insuranceLiabilities",
307 "m_liabilitiesIndependentAccounts",
308 "m_liabilitiesVicariousBusiness",
309 "m_otherLiablities",
310 "m_capitalPremium",
311 "m_petainedProfit",
312 "m_provisionTransactionRisk",
313 "m_otherReserves",
314 "__extra_141",
315 "__extra_142",
316 "__extra_143",
317 "__extra_144",
318 "__extra_145",
319 "__extra_146",
320 "__extra_147",
321 "__extra_148",
322 "__extra_149",
323 "__extra_150",
324 "__extra_151",
325 "__extra_152",
326 "__extra_153",
327 "__extra_154",
328 "__extra_155",
329];
330
331const INCOME_COLUMN_NAMES: [&str; 80] = [
332 "revenue_inc",
333 "earned_premium",
334 "real_estate_sales_income",
335 "total_operating_cost",
336 "real_estate_sales_cost",
337 "research_expenses",
338 "surrender_value",
339 "net_payments",
340 "net_withdrawal_ins_con_res",
341 "policy_dividend_expenses",
342 "reinsurance_cost",
343 "change_income_fair_value",
344 "futures_loss",
345 "trust_income",
346 "subsidize_revenue",
347 "other_business_profits",
348 "net_profit_excl_merged_int_inc",
349 "int_inc",
350 "handling_chrg_comm_inc",
351 "less_handling_chrg_comm_exp",
352 "other_bus_cost",
353 "plus_net_gain_fx_trans",
354 "il_net_loss_disp_noncur_asset",
355 "inc_tax",
356 "unconfirmed_invest_loss",
357 "net_profit_excl_min_int_inc",
358 "less_int_exp",
359 "other_bus_inc",
360 "revenue",
361 "total_expense",
362 "less_taxes_surcharges_ops",
363 "sale_expense",
364 "less_gerl_admin_exp",
365 "financial_expense",
366 "less_impair_loss_assets",
367 "plus_net_invest_inc",
368 "incl_inc_invest_assoc_jv_entp",
369 "oper_profit",
370 "plus_non_oper_rev",
371 "less_non_oper_exp",
372 "tot_profit",
373 "net_profit_incl_min_int_inc",
374 "net_profit_incl_min_int_inc_after",
375 "minority_int_inc",
376 "s_fa_eps_basic",
377 "s_fa_eps_diluted",
378 "total_income",
379 "total_income_minority",
380 "other_compreh_inc",
381 "operating_revenue",
382 "cost_of_goods_sold",
383 "m_netinterestIncome",
384 "m_netFeesCommissions",
385 "m_insuranceBusiness",
386 "m_separatePremium",
387 "m_asideReservesUndueLiabilities",
388 "m_paymentsInsuranceClaims",
389 "m_amortizedCompensationExpenses",
390 "m_netReserveInsuranceLiability",
391 "m_policyReserve",
392 "m_amortizeInsuranceReserve",
393 "m_nsuranceFeesCommissionExpenses",
394 "m_operationAdministrativeExpense",
395 "m_amortizedReinsuranceExpenditure",
396 "m_netProfitLossdisposalNonassets",
397 "m_otherItemsAffectingNetProfit",
398 "__extra_66",
399 "__extra_67",
400 "__extra_68",
401 "__extra_69",
402 "__extra_70",
403 "__extra_71",
404 "__extra_72",
405 "__extra_73",
406 "__extra_74",
407 "__extra_75",
408 "__extra_76",
409 "__extra_77",
410 "__extra_78",
411 "__extra_79",
412];
413
414const CASHFLOW_COLUMN_NAMES: [&str; 111] = [
415 "cash_received_ori_ins_contract_pre",
416 "net_cash_received_rei_ope",
417 "net_increase_insured_funds",
418 "net_increase_in_disposal",
419 "cash_for_interest",
420 "net_increase_in_repurchase_funds",
421 "cash_for_payment_original_insurance",
422 "cash_payment_policy_dividends",
423 "disposal_other_business_units",
424 "net_cash_deal_subcompany",
425 "cash_received_from_pledges",
426 "cash_paid_for_investments",
427 "net_increase_in_pledged_loans",
428 "cash_paid_by_subsidiaries",
429 "increase_in_cash_paid",
430 "fix_intan_other_asset_dispo_cash_payment",
431 "cash_from_mino_s_invest_sub",
432 "cass_received_sub_abs",
433 "cass_received_sub_investments",
434 "minority_shareholder_profit_loss",
435 "unrecognized_investment_losses",
436 "ncrease_deferred_income",
437 "projected_liability",
438 "increase_operational_payables",
439 "reduction_outstanding_amounts_less",
440 "reduction_outstanding_amounts_more",
441 "goods_sale_and_service_render_cash",
442 "net_incr_dep_cob",
443 "net_incr_loans_central_bank",
444 "net_incr_fund_borr_ofi",
445 "net_incr_fund_borr_ofi",
446 "tax_levy_refund",
447 "cash_paid_invest",
448 "other_cash_pay_ral_inv_act",
449 "other_cash_recp_ral_oper_act",
450 "stot_cash_inflows_oper_act",
451 "goods_and_services_cash_paid",
452 "net_incr_clients_loan_adv",
453 "net_incr_dep_cbob",
454 "handling_chrg_paid",
455 "cash_pay_beh_empl",
456 "pay_all_typ_tax",
457 "other_cash_pay_ral_oper_act",
458 "stot_cash_outflows_oper_act",
459 "net_cash_flows_oper_act",
460 "cash_recp_disp_withdrwl_invest",
461 "cash_recp_return_invest",
462 "net_cash_recp_disp_fiolta",
463 "other_cash_recp_ral_inv_act",
464 "stot_cash_inflows_inv_act",
465 "cash_pay_acq_const_fiolta",
466 "stot_cash_outflows_inv_act",
467 "net_cash_flows_inv_act",
468 "cash_recp_cap_contrib",
469 "cash_recp_borrow",
470 "proc_issue_bonds",
471 "other_cash_recp_ral_fnc_act",
472 "stot_cash_inflows_fnc_act",
473 "cash_prepay_amt_borr",
474 "cash_pay_dist_dpcp_int_exp",
475 "other_cash_pay_ral_fnc_act",
476 "stot_cash_outflows_fnc_act",
477 "net_cash_flows_fnc_act",
478 "eff_fx_flu_cash",
479 "net_incr_cash_cash_equ",
480 "cash_cash_equ_beg_period",
481 "cash_cash_equ_end_period",
482 "net_profit",
483 "plus_prov_depr_assets",
484 "depr_fa_coga_dpba",
485 "amort_intang_assets",
486 "amort_lt_deferred_exp",
487 "decr_deferred_exp",
488 "incr_acc_exp",
489 "loss_disp_fiolta",
490 "loss_scr_fa",
491 "loss_fv_chg",
492 "fin_exp",
493 "invest_loss",
494 "decr_deferred_inc_tax_assets",
495 "incr_deferred_inc_tax_liab",
496 "decr_inventories",
497 "decr_oper_payable",
498 "others",
499 "im_net_cash_flows_oper_act",
500 "conv_debt_into_cap",
501 "conv_corp_bonds_due_within_1y",
502 "fa_fnc_leases",
503 "end_bal_cash",
504 "less_beg_bal_cash",
505 "plus_end_bal_cash_equ",
506 "less_beg_bal_cash_equ",
507 "im_net_incr_cash_cash_equ",
508 "m_netDecreaseUnwindingFunds",
509 "m_netReductionPurchaseRebates",
510 "m_netIncreaseDepositsBanks",
511 "m_netCashReinsuranceBusiness",
512 "m_netReductionDeposInveFunds",
513 "m_netIncreaseUnwindingFunds",
514 "m_netReductionAmountBorrowedFunds",
515 "m_netReductionSaleRepurchaseProceeds",
516 "m_paymentOtherCashRelated",
517 "m_cashOutFlowsInvesactivities",
518 "m_absorbCashEquityInv",
519 "m_otherImpactsOnCash",
520 "m_addOperatingReceivableItems",
521 "__extra_106",
522 "__extra_107",
523 "__extra_108",
524 "__extra_109",
525 "__extra_110",
526];
527
528const RATIO_COLUMN_NAMES: [&str; 41] = [
529 "s_fa_ocfps",
530 "s_fa_bps",
531 "s_fa_eps_basic",
532 "s_fa_eps_diluted",
533 "s_fa_undistributedps",
534 "s_fa_surpluscapitalps",
535 "adjusted_earnings_per_share",
536 "du_return_on_equity",
537 "sales_gross_profit",
538 "inc_revenue_rate",
539 "du_profit_rate",
540 "inc_net_profit_rate",
541 "adjusted_net_profit_rate",
542 "inc_total_revenue_annual",
543 "inc_net_profit_to_shareholders_annual",
544 "adjusted_profit_to_profit_annual",
545 "equity_roe",
546 "net_roe",
547 "total_roe",
548 "gross_profit",
549 "net_profit",
550 "actual_tax_rate",
551 "pre_pay_operate_income",
552 "sales_cash_flow",
553 "pre_pay_operate_income",
554 "sales_cash_flow",
555 "gear_ratio",
556 "inventory_turnover",
557 "m_anntime",
558 "m_timetag",
559 "inc_revenue",
560 "inc_gross_profit",
561 "inc_profit_before_tax",
562 "du_profit",
563 "inc_net_profit",
564 "adjusted_net_profit",
565 "__extra_36",
566 "__extra_37",
567 "__extra_38",
568 "__extra_39",
569 "__extra_40",
570];
571
572impl FinanceRecord {
573 pub fn column_names(&self) -> Option<&'static [&'static str]> {
577 FinanceReader::column_names(self.file_type)
578 }
579
580 pub fn named_values(&self) -> Option<Vec<(&'static str, f64)>> {
584 let names = self.column_names()?;
585 match &self.data {
586 FinanceData::Report { columns } => {
587 Some(names.iter().copied().zip(columns.iter().copied()).collect())
588 }
589 FinanceData::Ratios { ratios } => {
590 Some(names.iter().copied().zip(ratios.iter().copied()).collect())
591 }
592 _ => None,
593 }
594 }
595}
596
597pub struct FinanceReader;
599
600impl FinanceReader {
601 pub fn column_names(file_type: FileType) -> Option<&'static [&'static str]> {
605 match file_type {
606 FileType::BalanceSheet => Some(&BALANCE_COLUMN_NAMES),
607 FileType::Income => Some(&INCOME_COLUMN_NAMES),
608 FileType::CashFlow => Some(&CASHFLOW_COLUMN_NAMES),
609 FileType::Ratios => Some(&RATIO_COLUMN_NAMES),
610 _ => None,
611 }
612 }
613
614 pub fn read_file(path: impl AsRef<Path>) -> Result<Vec<FinanceRecord>, FinanceError> {
630 let path = path.as_ref();
631 Self::validate_dat_path(path)?;
632 let file_type = Self::detect_type(path)?;
633 let file = File::open(path)?;
634
635 let mmap = unsafe { MmapOptions::new().map(&file)? };
637 let data = &mmap[..];
638
639 match file_type {
640 FileType::BalanceSheet => Self::parse_fixed(data, STRIDE_BALANCE, 0, |body| {
641 let mut cols = Vec::with_capacity(COLUMNS_BALANCE);
642 for i in 0..COLUMNS_BALANCE {
643 cols.push(Self::read_f64(body, i * 8).unwrap_or(f64::NAN));
644 }
645 FinanceData::Report { columns: cols }
646 }),
647 FileType::Income => Self::parse_fixed(data, STRIDE_INCOME, 8, |body| {
648 let mut cols = Vec::with_capacity(COLUMNS_INCOME);
649 for i in 0..COLUMNS_INCOME {
650 cols.push(Self::read_f64(body, i * 8).unwrap_or(f64::NAN));
651 }
652 FinanceData::Report { columns: cols }
653 }),
654 FileType::CashFlow => Self::parse_fixed(data, STRIDE_CASHFLOW, 8, |body| {
655 let mut cols = Vec::with_capacity(COLUMNS_CASHFLOW);
656 for i in 0..COLUMNS_CASHFLOW {
657 cols.push(Self::read_f64(body, i * 8).unwrap_or(f64::NAN));
658 }
659 FinanceData::Report { columns: cols }
660 }),
661 FileType::Ratios => Self::parse_fixed(data, STRIDE_RATIOS, 0, |body| {
662 let mut cols = Vec::with_capacity(COLUMNS_RATIOS);
663 for i in 0..COLUMNS_RATIOS {
664 cols.push(Self::read_f64(body, i * 8).unwrap_or(f64::NAN));
665 }
666 FinanceData::Ratios { ratios: cols }
667 }),
668 FileType::Capital => {
669 Self::parse_fixed(data, STRIDE_CAPITAL, 0, |body| {
670 FinanceData::Capital {
672 total_share: Self::read_f64(body, 0).unwrap_or(0.0),
673 flow_share: Self::read_f64(body, 8).unwrap_or(0.0),
674 restricted: Self::read_f64(body, 16).unwrap_or(0.0),
675 free_float_share: Self::read_f64(body, 24).unwrap_or(0.0),
676 }
677 })
678 }
679 FileType::HolderCount => {
680 Self::parse_7005_fixed(data)
686 }
687 FileType::TopFlowHolder | FileType::TopHolder => {
688 Self::parse_top_holders(data, file_type)
689 }
690 }
691 }
692
693 fn detect_type(path: &Path) -> Result<FileType, FinanceError> {
695 let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
696 let id_part = stem.split('_').next_back().unwrap_or("");
697 let id = id_part
698 .parse::<u16>()
699 .map_err(|_| FinanceError::Parse("Invalid Filename".into()))?;
700 FileType::from_id(id).ok_or(FinanceError::UnsupportedType(id))
701 }
702
703 fn validate_dat_path(path: &Path) -> Result<(), FinanceError> {
704 let ext = path
705 .extension()
706 .and_then(|s| s.to_str())
707 .unwrap_or_default()
708 .to_ascii_lowercase();
709 if ext != "dat" {
710 return Err(FinanceError::InvalidExtension(path.display().to_string()));
711 }
712 Ok(())
713 }
714
715 fn parse_fixed<F>(
718 data: &[u8],
719 stride: usize,
720 header_offset: usize,
721 parser: F,
722 ) -> Result<Vec<FinanceRecord>, FinanceError>
723 where
724 F: Fn(&[u8]) -> FinanceData,
725 {
726 let mut results = Vec::new();
727 let mut cursor = 0;
728 let len = data.len();
729
730 while cursor + header_offset + 16 <= len {
731 let header_start = cursor + header_offset;
733 let ts1 = LittleEndian::read_i64(&data[header_start..header_start + 8]);
734 let ts2 = LittleEndian::read_i64(&data[header_start + 8..header_start + 16]);
735
736 if Self::is_valid_ts(ts1) {
738 if cursor + stride <= len {
740 let report_date = Self::ts_to_bj(ts1);
741 let announce_date = if Self::is_valid_ts(ts2) {
743 Self::ts_to_bj(ts2)
744 } else {
745 report_date };
747
748 let body = &data[header_start + 16..cursor + stride];
749 results.push(FinanceRecord {
750 file_type: Self::file_type_from_stride(stride)?,
751 report_date,
752 announce_date,
753 data: parser(body),
754 });
755
756 cursor += stride;
757 continue;
758 }
759 }
760 cursor += 8;
762 }
763 Ok(results)
764 }
765
766 fn parse_7005_fixed(data: &[u8]) -> Result<Vec<FinanceRecord>, FinanceError> {
769 let mut results = Vec::new();
770 let mut cursor = 0;
771 let stride = STRIDE_HOLDER;
772
773 while cursor + 16 <= data.len() {
774 let ts1 = LittleEndian::read_i64(&data[cursor..cursor + 8]); let ts2 = LittleEndian::read_i64(&data[cursor + 8..cursor + 16]); if Self::is_valid_ts(ts2) && cursor + stride <= data.len() {
779 let report_date = Self::ts_to_bj(ts2);
780 let announce_date = if Self::is_valid_ts(ts1) {
781 Self::ts_to_bj(ts1)
782 } else {
783 report_date
784 };
785
786 let body = &data[cursor + 16..cursor + stride];
787 let total_holders = Self::read_f64(body, 0).unwrap_or(0.0) as i64;
788 let a_holders = Self::read_f64(body, 8).unwrap_or(0.0) as i64;
789 let b_holders = Self::read_f64(body, 16).unwrap_or(0.0) as i64;
790 let h_holders = Self::read_f64(body, 24).unwrap_or(0.0) as i64;
791 let float_holders = Self::read_f64(body, 32).unwrap_or(0.0) as i64;
792 let other_holders = Self::read_f64(body, 40).unwrap_or(0.0) as i64;
793
794 results.push(FinanceRecord {
795 file_type: FileType::HolderCount,
796 report_date,
797 announce_date,
798 data: FinanceData::HolderCount {
799 total_holders,
800 a_holders,
801 b_holders,
802 h_holders,
803 float_holders,
804 other_holders,
805 },
806 });
807 cursor += stride;
808 continue;
809 }
810 cursor += 8;
811 }
812 Ok(results)
813 }
814
815 fn parse_top_holders(
817 data: &[u8],
818 file_type: FileType,
819 ) -> Result<Vec<FinanceRecord>, FinanceError> {
820 let mut results = Vec::new();
821 let mut current_report_ts = 0i64;
822 let mut current_announce_ts = 0i64;
823 let mut current_holders = Vec::new();
824
825 for chunk in data.chunks_exact(STRIDE_TOP_HOLDER) {
826 let announce_ts = LittleEndian::read_i64(&chunk[0..8]);
827 let report_ts = LittleEndian::read_i64(&chunk[8..16]);
828 if !Self::is_valid_ts(announce_ts) || !Self::is_valid_ts(report_ts) {
829 continue;
830 }
831
832 let holder = Self::parse_top_holder_record(chunk);
833 if current_holders.is_empty() {
834 current_report_ts = report_ts;
835 current_announce_ts = announce_ts;
836 }
837
838 if report_ts != current_report_ts || announce_ts != current_announce_ts {
839 results.push(FinanceRecord {
840 file_type,
841 report_date: Self::ts_to_bj(current_report_ts),
842 announce_date: Self::ts_to_bj(current_announce_ts),
843 data: FinanceData::TopHolder {
844 holders: std::mem::take(&mut current_holders),
845 },
846 });
847 current_report_ts = report_ts;
848 current_announce_ts = announce_ts;
849 }
850
851 current_holders.push(holder);
852 }
853
854 if !current_holders.is_empty() {
855 results.push(FinanceRecord {
856 file_type,
857 report_date: Self::ts_to_bj(current_report_ts),
858 announce_date: Self::ts_to_bj(current_announce_ts),
859 data: FinanceData::TopHolder {
860 holders: current_holders,
861 },
862 });
863 }
864
865 Ok(results)
866 }
867
868 fn parse_top_holder_record(record: &[u8]) -> Shareholder {
869 Shareholder {
870 name: Self::read_string(record, 16, 192),
871 holder_type: Self::read_string(record, 216, 56),
872 hold_amount: Self::read_f64(record, 272).unwrap_or(0.0),
873 change_reason: Self::read_string(record, 280, 16),
874 hold_ratio: Self::read_f64(record, 304).unwrap_or(0.0),
875 share_type: Self::read_string(record, 312, 96),
876 rank: LittleEndian::read_u32(&record[412..416]),
877 }
878 }
879
880 fn file_type_from_stride(stride: usize) -> Result<FileType, FinanceError> {
881 match stride {
882 STRIDE_BALANCE => Ok(FileType::BalanceSheet),
883 STRIDE_INCOME => Ok(FileType::Income),
884 STRIDE_CASHFLOW => Ok(FileType::CashFlow),
885 STRIDE_RATIOS => Ok(FileType::Ratios),
886 STRIDE_CAPITAL => Ok(FileType::Capital),
887 STRIDE_HOLDER => Ok(FileType::HolderCount),
888 _ => Err(FinanceError::Parse(format!(
889 "Unknown finance stride: {}",
890 stride
891 ))),
892 }
893 }
894
895 fn is_valid_ts(ts: i64) -> bool {
896 (MIN_VALID_TS..=MAX_VALID_TS).contains(&ts)
897 }
898
899 fn ts_to_bj(ts: i64) -> BjDateTime {
901 let tz = FixedOffset::east_opt(8 * 3600).unwrap();
903 let secs = ts / 1000;
904 let nsecs = (ts % 1000) * 1_000_000;
905 tz.timestamp_opt(secs, nsecs as u32)
906 .single()
907 .unwrap_or_default()
908 }
909
910 fn read_f64(data: &[u8], offset: usize) -> Option<f64> {
912 if offset + 8 > data.len() {
913 return None;
914 }
915 let u = LittleEndian::read_u64(&data[offset..offset + 8]);
916 if u == QMT_NAN_HEX {
917 return None;
918 }
919 let f = f64::from_bits(u);
920 if f.is_nan() { None } else { Some(f) }
921 }
922
923 fn read_string(data: &[u8], offset: usize, max_len: usize) -> String {
925 if offset >= data.len() {
926 return String::new();
927 }
928 let end = (offset + max_len).min(data.len());
929 let slice = &data[offset..end];
930 let actual_len = slice.iter().position(|&c| c == 0).unwrap_or(slice.len());
932 String::from_utf8_lossy(&slice[..actual_len])
933 .trim()
934 .to_string()
935 }
936}
937
938#[cfg(test)]
939mod tests {
940 use super::*;
941 use std::path::PathBuf;
942
943 fn get_fixture(file: &str) -> PathBuf {
944 PathBuf::from("/home/sunnysab/Code/trade-rs/qmt-parser/finance/").join(file)
945 }
946
947 fn print_head(type_id: u16, records: &[FinanceRecord]) {
949 println!(
950 "\n>>> [Type {}] Found {} records. Showing first 5:",
951 type_id,
952 records.len()
953 );
954 for (i, rec) in records.iter().take(5).enumerate() {
955 println!(
956 "#{:03} | Report: {} | Announce: {}",
957 i,
958 rec.report_date.format("%Y-%m-%d"),
959 rec.announce_date.format("%Y-%m-%d")
960 );
961 println!("Data: {:#?}\n", rec.data);
963 }
964 if records.is_empty() {
965 println!("(No records found)\n");
966 } else {
967 println!(
968 "... (remaining {} records omitted)\n",
969 records.len().saturating_sub(5)
970 );
971 }
972 }
973
974 #[test]
975 fn test_7001_balance_sheet() {
976 let path = get_fixture("002419_7001.DAT");
977 if !path.exists() {
978 eprintln!("Skipping 7001: File not found");
979 return;
980 }
981
982 let res = FinanceReader::read_file(&path).expect("Failed to parse 7001");
983 assert!(!res.is_empty(), "7001 should not be empty");
984
985 if let FinanceData::Report { columns } = &res[0].data {
986 assert_eq!(columns.len(), 156);
987 assert!(columns[0].is_nan());
988 assert!((columns[11] - 273_297_896.39).abs() < 1e-6);
989 } else {
990 panic!("7001 parsed as wrong type");
991 }
992
993 print_head(7001, &res);
994 }
995
996 #[test]
997 fn test_7002_income() {
998 let path = get_fixture("002419_7002.DAT");
999 if !path.exists() {
1000 eprintln!("Skipping 7002: File not found");
1001 return;
1002 }
1003
1004 let res = FinanceReader::read_file(&path).expect("Failed to parse 7002");
1005 assert!(!res.is_empty(), "7002 should not be empty");
1006
1007 if let FinanceData::Report { columns } = &res[0].data {
1008 assert_eq!(columns.len(), 80);
1009 assert!((columns[0] - 4_809_251_460.5).abs() < 1e-6);
1010 assert!(columns[1].is_nan());
1011 } else {
1012 panic!("7002 parsed as wrong type");
1013 }
1014
1015 print_head(7002, &res);
1016 }
1017
1018 #[test]
1019 fn test_7003_cashflow() {
1020 let path = get_fixture("002419_7003.DAT");
1021 if !path.exists() {
1022 eprintln!("Skipping 7003: File not found");
1023 return;
1024 }
1025
1026 let res = FinanceReader::read_file(&path).expect("Failed to parse 7003");
1027 assert!(!res.is_empty(), "7003 should not be empty");
1028
1029 if let FinanceData::Report { columns } = &res[0].data {
1030 assert_eq!(columns.len(), 111);
1031 assert!(columns[0].is_nan());
1032 assert!((columns[23] - 5_506_707_615.58).abs() < 1e-6);
1033 } else {
1034 panic!("7003 parsed as wrong type");
1035 }
1036
1037 print_head(7003, &res);
1038 }
1039
1040 #[test]
1041 fn test_7004_capital() {
1042 let path = get_fixture("002419_7004.DAT");
1043 if !path.exists() {
1044 eprintln!("Skipping 7004: File not found");
1045 return;
1046 }
1047
1048 let res = FinanceReader::read_file(&path).expect("Failed to parse 7004");
1049 assert!(!res.is_empty(), "7004 should not be empty");
1050
1051 if let FinanceData::Capital {
1052 total_share,
1053 flow_share,
1054 restricted,
1055 free_float_share,
1056 } = &res[0].data
1057 {
1058 assert_eq!(*total_share, 400_100_000.0);
1059 assert_eq!(*flow_share, 40_080_000.0);
1060 assert_eq!(*restricted, 0.0);
1061 assert_eq!(*free_float_share, 40_080_000.0);
1062 } else {
1063 panic!("7004 parsed as wrong type");
1064 }
1065
1066 print_head(7004, &res);
1067 }
1068
1069 #[test]
1070 fn test_7005_holder_count() {
1071 let path = get_fixture("002419_7005.DAT");
1072 if !path.exists() {
1073 eprintln!("Skipping 7005: File not found");
1074 return;
1075 }
1076
1077 let res = FinanceReader::read_file(&path).expect("Failed to parse 7005");
1078 assert!(!res.is_empty(), "7005 should not be empty");
1079
1080 if let FinanceData::HolderCount {
1081 total_holders,
1082 a_holders,
1083 b_holders,
1084 h_holders,
1085 float_holders,
1086 other_holders,
1087 } = &res[0].data
1088 {
1089 assert_eq!(*total_holders, 35_719);
1090 assert_eq!(*a_holders, 35_719);
1091 assert_eq!(*b_holders, 0);
1092 assert_eq!(*h_holders, 0);
1093 assert_eq!(*float_holders, 0);
1094 assert_eq!(*other_holders, 0);
1095 } else {
1096 panic!("7005 parsed as wrong type");
1097 }
1098
1099 print_head(7005, &res);
1100 }
1101
1102 #[test]
1103 fn test_7006_top_float_holder() {
1104 let path = get_fixture("002419_7006.DAT");
1105 if !path.exists() {
1106 eprintln!("Skipping 7006: File not found");
1107 return;
1108 }
1109
1110 let res = FinanceReader::read_file(&path).expect("Failed to parse 7006");
1111 assert!(!res.is_empty(), "7006 should not be empty");
1112
1113 if let FinanceData::TopHolder { holders } = &res[0].data {
1114 assert_eq!(holders.len(), 40);
1115 assert_eq!(holders[0].name, "中国航空技术深圳有限公司");
1116 assert_eq!(holders[0].holder_type, "机构投资账户");
1117 assert_eq!(holders[0].hold_amount, 158_128_000.0);
1118 assert_eq!(holders[0].change_reason, "不变");
1119 assert_eq!(holders[0].hold_ratio, 39.52);
1120 assert_eq!(holders[0].share_type, "流通A股");
1121 assert_eq!(holders[0].rank, 1);
1122 } else {
1123 panic!("7006 parsed as wrong type");
1124 }
1125
1126 print_head(7006, &res);
1127 }
1128
1129 #[test]
1130 fn test_7007_top_holder() {
1131 let path = get_fixture("002419_7007.DAT");
1132 if !path.exists() {
1133 eprintln!("Skipping 7007: File not found");
1134 return;
1135 }
1136
1137 let res = FinanceReader::read_file(&path).expect("Failed to parse 7007");
1138 assert!(!res.is_empty(), "7007 should not be empty");
1139
1140 if let FinanceData::TopHolder { holders } = &res[0].data {
1141 assert_eq!(holders.len(), 10);
1142 assert_eq!(holders[0].name, "中国工商银行-诺安股票证券投资基金");
1143 assert_eq!(holders[0].holder_type, "机构投资账户");
1144 assert_eq!(holders[0].hold_amount, 1_799_860.0);
1145 assert_eq!(holders[0].change_reason, "不变");
1146 assert_eq!(holders[0].hold_ratio, 0.45);
1147 assert_eq!(holders[0].share_type, "流通A股");
1148 assert_eq!(holders[0].rank, 1);
1149 } else {
1150 panic!("7007 parsed as wrong type");
1151 }
1152
1153 print_head(7007, &res);
1154 }
1155
1156 #[test]
1157 fn test_7008_ratios() {
1158 let path = get_fixture("002419_7008.DAT");
1159 if !path.exists() {
1160 eprintln!("Skipping 7008: File not found");
1161 return;
1162 }
1163
1164 let res = FinanceReader::read_file(&path).expect("Failed to parse 7008");
1165 assert!(!res.is_empty(), "7008 should not be empty");
1166
1167 if let FinanceData::Ratios { ratios } = &res[0].data {
1168 assert_eq!(ratios.len(), 41);
1169 } else {
1170 panic!("7008 parsed as wrong type");
1171 }
1172
1173 print_head(7008, &res);
1174 }
1175
1176 #[test]
1177 fn test_report_column_names() {
1178 let balance = FinanceReader::column_names(FileType::BalanceSheet).expect("7001 names");
1179 assert_eq!(balance.len(), 156);
1180 assert_eq!(balance[0], "internal_shoule_recv");
1181 assert_eq!(balance[44], "cash_equivalents");
1182 assert_eq!(balance[140], "m_otherReserves");
1183 assert_eq!(balance[141], "__extra_141");
1184
1185 let income = FinanceReader::column_names(FileType::Income).expect("7002 names");
1186 assert_eq!(income.len(), 80);
1187 assert_eq!(income[0], "revenue_inc");
1188 assert_eq!(income[3], "total_operating_cost");
1189 assert_eq!(income[65], "m_otherItemsAffectingNetProfit");
1190 assert_eq!(income[66], "__extra_66");
1191
1192 let cashflow = FinanceReader::column_names(FileType::CashFlow).expect("7003 names");
1193 assert_eq!(cashflow.len(), 111);
1194 assert_eq!(cashflow[0], "cash_received_ori_ins_contract_pre");
1195 assert_eq!(cashflow[26], "goods_sale_and_service_render_cash");
1196 assert_eq!(cashflow[105], "m_addOperatingReceivableItems");
1197 assert_eq!(cashflow[106], "__extra_106");
1198
1199 let ratios = FinanceReader::column_names(FileType::Ratios).expect("7008 names");
1200 assert_eq!(ratios.len(), 41);
1201 assert_eq!(ratios[0], "s_fa_ocfps");
1202 assert_eq!(ratios[1], "s_fa_bps");
1203 assert_eq!(ratios[35], "adjusted_net_profit");
1204 assert_eq!(ratios[36], "__extra_36");
1205 }
1206
1207 #[test]
1208 fn test_named_values_for_ratios() {
1209 let path = get_fixture("002419_7008.DAT");
1210 if !path.exists() {
1211 eprintln!("Skipping 7008: File not found");
1212 return;
1213 }
1214
1215 let res = FinanceReader::read_file(&path).expect("Failed to parse 7008");
1216 let named = res[0].named_values().expect("named ratios");
1217
1218 assert_eq!(named[0].0, "s_fa_ocfps");
1219 assert!((named[0].1 - 0.5646).abs() < 1e-9);
1220 assert_eq!(named[1].0, "s_fa_bps");
1221 assert!((named[1].1 - 7.62).abs() < 1e-9);
1222 }
1223
1224 #[test]
1225 fn test_finance_record_is_self_describing() {
1226 let path = get_fixture("002419_7001.DAT");
1227 if !path.exists() {
1228 eprintln!("Skipping 7001: File not found");
1229 return;
1230 }
1231
1232 let res = FinanceReader::read_file(&path).expect("Failed to parse 7001");
1233 let first = &res[0];
1234 assert_eq!(first.file_type, FileType::BalanceSheet);
1235 let names = first.column_names().expect("balance sheet names");
1236 assert_eq!(names[0], "internal_shoule_recv");
1237
1238 let named = first.named_values().expect("self-described values");
1239 assert_eq!(named[0].0, "internal_shoule_recv");
1240 }
1241}