data_preprocess/parser/
mod.rs1pub mod bar_csv;
2pub mod tick_csv;
3
4use crate::error::{DataError, Result};
5use chrono::{FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, TimeZone};
6use std::path::Path;
7
8pub fn extract_symbol_from_filename(path: &Path) -> Result<String> {
10 let stem = path
11 .file_stem()
12 .and_then(|s| s.to_str())
13 .ok_or_else(|| DataError::SymbolExtraction(path.display().to_string()))?;
14 let symbol = stem
15 .split('_')
16 .next()
17 .ok_or_else(|| DataError::SymbolExtraction(stem.to_string()))?;
18 if symbol.is_empty() {
19 return Err(DataError::SymbolExtraction(stem.to_string()));
20 }
21 Ok(symbol.to_uppercase())
22}
23
24pub fn normalize_exchange(exchange: &str) -> String {
26 exchange.trim().to_lowercase()
27}
28
29pub fn parse_tz_offset(s: &str) -> Result<FixedOffset> {
31 let s = s.trim();
32 if s.len() < 5 {
33 return Err(DataError::InvalidTimestamp(format!(
34 "invalid tz offset: {s}"
35 )));
36 }
37 let sign = match s.as_bytes()[0] {
38 b'+' => 1i32,
39 b'-' => -1i32,
40 _ => {
41 return Err(DataError::InvalidTimestamp(format!(
42 "tz offset must start with +/-: {s}"
43 )))
44 }
45 };
46 let rest = &s[1..];
47 let parts: Vec<&str> = rest.split(':').collect();
48 if parts.len() != 2 {
49 return Err(DataError::InvalidTimestamp(format!(
50 "invalid tz offset format: {s}"
51 )));
52 }
53 let hours: i32 = parts[0]
54 .parse()
55 .map_err(|_| DataError::InvalidTimestamp(format!("bad hours in tz: {s}")))?;
56 let minutes: i32 = parts[1]
57 .parse()
58 .map_err(|_| DataError::InvalidTimestamp(format!("bad minutes in tz: {s}")))?;
59 let total_secs = sign * (hours * 3600 + minutes * 60);
60 FixedOffset::east_opt(total_secs)
61 .ok_or_else(|| DataError::InvalidTimestamp(format!("out of range tz offset: {s}")))
62}
63
64pub fn parse_datetime_to_utc(
67 date_str: &str,
68 time_str: &str,
69 source_offset: &FixedOffset,
70) -> Result<NaiveDateTime> {
71 let date = NaiveDate::parse_from_str(date_str, "%Y.%m.%d")
72 .map_err(|e| DataError::InvalidTimestamp(format!("{date_str}: {e}")))?;
73 let time = NaiveTime::parse_from_str(time_str, "%H:%M:%S%.f")
75 .map_err(|e| DataError::InvalidTimestamp(format!("{time_str}: {e}")))?;
76 let ndt = NaiveDateTime::new(date, time);
77 let local = source_offset
78 .from_local_datetime(&ndt)
79 .single()
80 .ok_or_else(|| DataError::InvalidTimestamp(format!("ambiguous datetime: {ndt}")))?;
81 Ok(local.naive_utc())
82}
83
84pub fn parse_datetime_arg(s: &str) -> Result<NaiveDateTime> {
86 if let Ok(dt) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") {
87 return Ok(dt);
88 }
89 if let Ok(d) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
90 return Ok(d.and_hms_opt(0, 0, 0).unwrap());
91 }
92 Err(DataError::InvalidTimestamp(format!(
93 "expected YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS, got: {s}"
94 )))
95}
96
97pub fn parse_optional_f64(field: Option<&str>) -> Option<f64> {
99 field
100 .map(|s| s.trim())
101 .filter(|s| !s.is_empty())
102 .and_then(|s| s.parse().ok())
103}
104
105pub fn parse_optional_i32(field: Option<&str>) -> Option<i32> {
107 field
108 .map(|s| s.trim())
109 .filter(|s| !s.is_empty())
110 .and_then(|s| s.parse().ok())
111}
112
113pub fn parse_required_f64(
115 field: Option<&str>,
116 name: &str,
117 line: usize,
118) -> std::result::Result<f64, String> {
119 field
120 .map(str::trim)
121 .filter(|s| !s.is_empty())
122 .and_then(|s| s.parse::<f64>().ok())
123 .ok_or_else(|| format!("line {}: missing or invalid {}", line, name))
124}
125
126pub fn parse_required_i64(field: Option<&str>) -> Option<i64> {
128 field
129 .map(str::trim)
130 .filter(|s| !s.is_empty())
131 .and_then(|s| s.parse().ok())
132}
133
134pub fn parse_required_i32(field: Option<&str>) -> Option<i32> {
136 field
137 .map(str::trim)
138 .filter(|s| !s.is_empty())
139 .and_then(|s| s.parse().ok())
140}