Skip to main content

qmt_parser/
datadir.rs

1//! 基于 QMT `datadir` 根目录的自动发现入口。
2//!
3//! 这个模块把“文件路径拼装/发现”和“实际解析”拆开:
4//!
5//! - [`QmtDataDir`] 负责从 datadir 发现 tick、分钟线、日线、财务、分红和 metadata 文件
6//! - 具体二进制解析仍委托给现有模块
7
8use std::collections::BTreeMap;
9use std::path::{Path, PathBuf};
10
11use chrono::NaiveDate;
12
13use crate::day::{
14    DailyKlineData, parse_daily_file_to_structs, parse_daily_to_structs,
15    parse_daily_to_structs_in_range,
16};
17use crate::dividend::DividendDb;
18use crate::error::DataDirError;
19use crate::finance::{FileType, FinanceReader, FinanceRecord};
20use crate::metadata::{
21    load_holidays_from_root, load_industry_from_root, load_sector_names_from_root,
22    load_sector_weight_index_from_root, load_sector_weight_members_from_root,
23    load_sectorlist_from_root,
24};
25use crate::min::{MinKlineData, parse_min_to_structs};
26use crate::tick::{TickData, parse_ticks_to_structs};
27
28#[cfg(feature = "polars")]
29use crate::day::{
30    parse_daily_file_to_dataframe, parse_daily_to_dataframe, parse_daily_to_dataframe_in_range,
31};
32#[cfg(feature = "polars")]
33use crate::min::parse_min_to_dataframe;
34#[cfg(feature = "polars")]
35use crate::tick::parse_ticks_to_dataframe;
36#[cfg(feature = "polars")]
37use polars::prelude::DataFrame;
38
39/// 交易市场枚举。
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
41pub enum Market {
42    /// 上海市场。
43    Sh,
44    /// 深圳市场。
45    Sz,
46    /// 北京市场。
47    Bj,
48}
49
50impl Market {
51    /// 返回 QMT datadir 使用的市场目录名。
52    pub fn as_str(self) -> &'static str {
53        match self {
54            Self::Sh => "SH",
55            Self::Sz => "SZ",
56            Self::Bj => "BJ",
57        }
58    }
59}
60
61impl TryFrom<&str> for Market {
62    type Error = DataDirError;
63
64    fn try_from(value: &str) -> Result<Self, Self::Error> {
65        let normalized = value.trim().to_ascii_uppercase();
66        match normalized.as_str() {
67            "SH" => Ok(Self::Sh),
68            "SZ" => Ok(Self::Sz),
69            "BJ" => Ok(Self::Bj),
70            _ => Err(DataDirError::InvalidInput(format!(
71                "unknown market: {value}"
72            ))),
73        }
74    }
75}
76
77/// 解析证券代码字符串,支持 `SZ000001` 和 `000001.SZ` 格式。
78pub fn parse_security_code(value: &str) -> Result<(Market, String), DataDirError> {
79    let raw = value.trim();
80    validate_non_empty("security_code", raw)?;
81
82    if let Some((symbol, market)) = raw.rsplit_once('.') {
83        validate_symbol(symbol)?;
84        return Ok((Market::try_from(market)?, symbol.to_string()));
85    }
86
87    if raw.len() <= 2 {
88        return Err(DataDirError::InvalidInput(format!(
89            "unsupported security code: {value}"
90        )));
91    }
92
93    let (market, symbol) = raw.split_at(2);
94    validate_symbol(symbol)?;
95    Ok((Market::try_from(market)?, symbol.to_string()))
96}
97
98/// QMT datadir 根目录句柄。
99#[derive(Debug, Clone)]
100pub struct QmtDataDir {
101    root: PathBuf,
102}
103
104impl QmtDataDir {
105    /// 创建 datadir 根目录句柄。
106    pub fn new(path: impl AsRef<Path>) -> Result<Self, DataDirError> {
107        let root = path.as_ref().to_path_buf();
108        if !root.is_dir() {
109            return Err(DataDirError::InvalidRoot(root));
110        }
111        Ok(Self { root })
112    }
113
114    /// 返回 datadir 根目录。
115    pub fn root(&self) -> &Path {
116        &self.root
117    }
118
119    /// 定位 tick 文件路径。
120    pub fn tick_path(
121        &self,
122        market: Market,
123        symbol: &str,
124        date: &str,
125    ) -> Result<PathBuf, DataDirError> {
126        validate_symbol(symbol)?;
127        validate_date(date)?;
128        first_existing(
129            "tick file",
130            vec![
131                self.root
132                    .join(market.as_str())
133                    .join("0")
134                    .join(symbol)
135                    .join(format!("{date}.dat")),
136                self.root
137                    .join(market.as_str())
138                    .join("0")
139                    .join(symbol)
140                    .join(format!("{date}.DAT")),
141            ],
142        )
143    }
144
145    /// 定位 1 分钟线文件路径。
146    pub fn min_path(&self, market: Market, symbol: &str) -> Result<PathBuf, DataDirError> {
147        validate_symbol(symbol)?;
148        first_existing(
149            "minute file",
150            vec![
151                self.root
152                    .join(market.as_str())
153                    .join("60")
154                    .join(format!("{symbol}.dat")),
155                self.root
156                    .join(market.as_str())
157                    .join("60")
158                    .join(format!("{symbol}.DAT")),
159            ],
160        )
161    }
162
163    /// 定位日线文件路径。
164    pub fn day_path(&self, market: Market, symbol: &str) -> Result<PathBuf, DataDirError> {
165        validate_symbol(symbol)?;
166        first_existing(
167            "daily file",
168            vec![
169                self.root
170                    .join(market.as_str())
171                    .join("86400")
172                    .join(format!("{symbol}.DAT")),
173                self.root
174                    .join(market.as_str())
175                    .join("86400")
176                    .join(format!("{symbol}.dat")),
177            ],
178        )
179    }
180
181    /// 定位财务文件路径。
182    pub fn finance_path(&self, symbol: &str, file_type: FileType) -> Result<PathBuf, DataDirError> {
183        validate_symbol(symbol)?;
184        let file_id = file_type as u16;
185        let filename_upper = format!("{symbol}_{file_id}.DAT");
186        let filename_lower = format!("{symbol}_{file_id}.dat");
187        first_existing(
188            "finance file",
189            vec![
190                self.root.join("financial").join(&filename_upper),
191                self.root.join("financial").join(&filename_lower),
192                self.root.join("finance").join(&filename_upper),
193                self.root.join("finance").join(&filename_lower),
194                self.root.join("Finance").join(&filename_upper),
195                self.root.join("Finance").join(&filename_lower),
196            ],
197        )
198    }
199
200    /// 定位分红 LevelDB 目录。
201    pub fn dividend_db_path(&self) -> Result<PathBuf, DataDirError> {
202        first_existing("dividend db", vec![self.root.join("DividData")])
203    }
204
205    /// 从 datadir 发现并解析 tick 文件为结构体。
206    pub fn parse_ticks_to_structs(
207        &self,
208        market: Market,
209        symbol: &str,
210        date: &str,
211    ) -> Result<Vec<TickData>, DataDirError> {
212        Ok(parse_ticks_to_structs(
213            self.tick_path(market, symbol, date)?,
214        )?)
215    }
216
217    /// 从 datadir 发现并解析 tick 文件为 `DataFrame`。
218    #[cfg(feature = "polars")]
219    pub fn parse_ticks_to_dataframe(
220        &self,
221        market: Market,
222        symbol: &str,
223        date: &str,
224    ) -> Result<DataFrame, DataDirError> {
225        Ok(parse_ticks_to_dataframe(
226            self.tick_path(market, symbol, date)?,
227        )?)
228    }
229
230    /// 从 datadir 发现并解析 1 分钟线文件为结构体。
231    pub fn parse_min_to_structs(
232        &self,
233        market: Market,
234        symbol: &str,
235    ) -> Result<Vec<MinKlineData>, DataDirError> {
236        Ok(parse_min_to_structs(self.min_path(market, symbol)?)?)
237    }
238
239    /// 从 datadir 发现并解析 1 分钟线文件为 `DataFrame`。
240    #[cfg(feature = "polars")]
241    pub fn parse_min_to_dataframe(
242        &self,
243        market: Market,
244        symbol: &str,
245    ) -> Result<DataFrame, DataDirError> {
246        Ok(parse_min_to_dataframe(self.min_path(market, symbol)?)?)
247    }
248
249    /// 从 datadir 发现并解析整个日线文件为结构体。
250    pub fn parse_daily_file_to_structs(
251        &self,
252        market: Market,
253        symbol: &str,
254    ) -> Result<Vec<DailyKlineData>, DataDirError> {
255        Ok(parse_daily_file_to_structs(self.day_path(market, symbol)?)?)
256    }
257
258    /// 从 datadir 发现并按字符串日期范围解析日线为结构体。
259    pub fn parse_daily_to_structs(
260        &self,
261        market: Market,
262        symbol: &str,
263        start: &str,
264        end: &str,
265    ) -> Result<Vec<DailyKlineData>, DataDirError> {
266        Ok(parse_daily_to_structs(
267            self.day_path(market, symbol)?,
268            start,
269            end,
270        )?)
271    }
272
273    /// 从 datadir 发现并按 typed 日期范围解析日线为结构体。
274    pub fn parse_daily_to_structs_in_range(
275        &self,
276        market: Market,
277        symbol: &str,
278        start: Option<NaiveDate>,
279        end: Option<NaiveDate>,
280    ) -> Result<Vec<DailyKlineData>, DataDirError> {
281        Ok(parse_daily_to_structs_in_range(
282            self.day_path(market, symbol)?,
283            start,
284            end,
285        )?)
286    }
287
288    /// 从 datadir 发现并解析整个日线文件为 `DataFrame`。
289    #[cfg(feature = "polars")]
290    pub fn parse_daily_file_to_dataframe(
291        &self,
292        market: Market,
293        symbol: &str,
294    ) -> Result<DataFrame, DataDirError> {
295        Ok(parse_daily_file_to_dataframe(
296            self.day_path(market, symbol)?,
297        )?)
298    }
299
300    /// 从 datadir 发现并按字符串日期范围解析日线为 `DataFrame`。
301    #[cfg(feature = "polars")]
302    pub fn parse_daily_to_dataframe(
303        &self,
304        market: Market,
305        symbol: &str,
306        start: &str,
307        end: &str,
308    ) -> Result<DataFrame, DataDirError> {
309        Ok(parse_daily_to_dataframe(
310            self.day_path(market, symbol)?,
311            start,
312            end,
313        )?)
314    }
315
316    /// 从 datadir 发现并按 typed 日期范围解析日线为 `DataFrame`。
317    #[cfg(feature = "polars")]
318    pub fn parse_daily_to_dataframe_in_range(
319        &self,
320        market: Market,
321        symbol: &str,
322        start: Option<NaiveDate>,
323        end: Option<NaiveDate>,
324    ) -> Result<DataFrame, DataDirError> {
325        Ok(parse_daily_to_dataframe_in_range(
326            self.day_path(market, symbol)?,
327            start,
328            end,
329        )?)
330    }
331
332    /// 从 datadir 发现并读取财务文件。
333    pub fn read_finance(
334        &self,
335        symbol: &str,
336        file_type: FileType,
337    ) -> Result<Vec<FinanceRecord>, DataDirError> {
338        Ok(FinanceReader::read_file(
339            self.finance_path(symbol, file_type)?,
340        )?)
341    }
342
343    /// 从 datadir 发现并打开分红数据库。
344    pub fn open_dividend_db(&self) -> Result<DividendDb, DataDirError> {
345        Ok(DividendDb::new(self.dividend_db_path()?)?)
346    }
347
348    /// 从 datadir 发现并加载节假日列表。
349    pub fn load_holidays(&self) -> Result<Vec<i64>, DataDirError> {
350        Ok(load_holidays_from_root(&self.root)?)
351    }
352
353    /// 从 datadir 发现并加载 sector 名称。
354    pub fn load_sector_names(&self) -> Result<Vec<String>, DataDirError> {
355        Ok(load_sector_names_from_root(&self.root)?)
356    }
357
358    /// 从 datadir 发现并加载 `sectorlist.DAT`。
359    pub fn load_sectorlist(&self) -> Result<Vec<String>, DataDirError> {
360        Ok(load_sectorlist_from_root(&self.root)?)
361    }
362
363    /// 从 datadir 发现并加载全部 sector 成员映射。
364    pub fn load_sector_weight_members(
365        &self,
366    ) -> Result<BTreeMap<String, Vec<String>>, DataDirError> {
367        Ok(load_sector_weight_members_from_root(&self.root)?)
368    }
369
370    /// 从 datadir 发现并加载指定 sector/index 的权重映射。
371    pub fn load_sector_weight_index(
372        &self,
373        index_code: &str,
374    ) -> Result<BTreeMap<String, f64>, DataDirError> {
375        validate_non_empty("index_code", index_code)?;
376        Ok(load_sector_weight_index_from_root(&self.root, index_code)?)
377    }
378
379    /// 从 datadir 发现并加载行业成员映射。
380    pub fn load_industry(&self) -> Result<BTreeMap<String, Vec<String>>, DataDirError> {
381        Ok(load_industry_from_root(&self.root)?)
382    }
383}
384
385fn validate_symbol(symbol: &str) -> Result<(), DataDirError> {
386    validate_non_empty("symbol", symbol)
387}
388
389fn validate_date(date: &str) -> Result<(), DataDirError> {
390    validate_non_empty("date", date)?;
391    if date.len() != 8 || !date.chars().all(|ch| ch.is_ascii_digit()) {
392        return Err(DataDirError::InvalidInput(format!(
393            "date must be YYYYMMDD, got {date}"
394        )));
395    }
396    Ok(())
397}
398
399fn validate_non_empty(field: &str, value: &str) -> Result<(), DataDirError> {
400    if value.trim().is_empty() {
401        return Err(DataDirError::InvalidInput(format!(
402            "{field} cannot be empty"
403        )));
404    }
405    Ok(())
406}
407
408fn first_existing(kind: &'static str, tried: Vec<PathBuf>) -> Result<PathBuf, DataDirError> {
409    for path in &tried {
410        if path.exists() {
411            return Ok(path.clone());
412        }
413    }
414    Err(DataDirError::PathNotFound { kind, tried })
415}