Skip to main content

data_preprocess/parser/
mod.rs

1pub mod bar_csv;
2pub mod tick_csv;
3
4use crate::error::{DataError, Result};
5use chrono::{FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, TimeZone};
6use std::path::Path;
7
8/// Extract the symbol from a filename (first segment before '_'), uppercased.
9pub fn extract_symbol_from_filename(path: &Path) -> Result<String> {
10    let stem = path
11        .file_stem()
12        .and_then(|s| s.to_str())
13        .ok_or_else(|| DataError::SymbolExtraction(path.display().to_string()))?;
14    let symbol = stem
15        .split('_')
16        .next()
17        .ok_or_else(|| DataError::SymbolExtraction(stem.to_string()))?;
18    if symbol.is_empty() {
19        return Err(DataError::SymbolExtraction(stem.to_string()));
20    }
21    Ok(symbol.to_uppercase())
22}
23
24/// Normalize exchange name: lowercase, trimmed.
25pub fn normalize_exchange(exchange: &str) -> String {
26    exchange.trim().to_lowercase()
27}
28
29/// Parse a timezone offset string like "+02:00" or "-05:00" into FixedOffset.
30pub fn parse_tz_offset(s: &str) -> Result<FixedOffset> {
31    let s = s.trim();
32    if s.len() < 5 {
33        return Err(DataError::InvalidTimestamp(format!(
34            "invalid tz offset: {s}"
35        )));
36    }
37    let sign = match s.as_bytes()[0] {
38        b'+' => 1i32,
39        b'-' => -1i32,
40        _ => {
41            return Err(DataError::InvalidTimestamp(format!(
42                "tz offset must start with +/-: {s}"
43            )))
44        }
45    };
46    let rest = &s[1..];
47    let parts: Vec<&str> = rest.split(':').collect();
48    if parts.len() != 2 {
49        return Err(DataError::InvalidTimestamp(format!(
50            "invalid tz offset format: {s}"
51        )));
52    }
53    let hours: i32 = parts[0]
54        .parse()
55        .map_err(|_| DataError::InvalidTimestamp(format!("bad hours in tz: {s}")))?;
56    let minutes: i32 = parts[1]
57        .parse()
58        .map_err(|_| DataError::InvalidTimestamp(format!("bad minutes in tz: {s}")))?;
59    let total_secs = sign * (hours * 3600 + minutes * 60);
60    FixedOffset::east_opt(total_secs)
61        .ok_or_else(|| DataError::InvalidTimestamp(format!("out of range tz offset: {s}")))
62}
63
64/// Parse date+time CSV columns into NaiveDateTime (UTC).
65/// Interprets the input as the given source_offset, converts to UTC.
66pub fn parse_datetime_to_utc(
67    date_str: &str,
68    time_str: &str,
69    source_offset: &FixedOffset,
70) -> Result<NaiveDateTime> {
71    let date = NaiveDate::parse_from_str(date_str, "%Y.%m.%d")
72        .map_err(|e| DataError::InvalidTimestamp(format!("{date_str}: {e}")))?;
73    // Handles both "HH:MM:SS" and "HH:MM:SS.mmm"
74    let time = NaiveTime::parse_from_str(time_str, "%H:%M:%S%.f")
75        .map_err(|e| DataError::InvalidTimestamp(format!("{time_str}: {e}")))?;
76    let ndt = NaiveDateTime::new(date, time);
77    let local = source_offset
78        .from_local_datetime(&ndt)
79        .single()
80        .ok_or_else(|| DataError::InvalidTimestamp(format!("ambiguous datetime: {ndt}")))?;
81    Ok(local.naive_utc())
82}
83
84/// Parse CLI datetime argument: "YYYY-MM-DD" or "YYYY-MM-DDTHH:MM:SS".
85pub fn parse_datetime_arg(s: &str) -> Result<NaiveDateTime> {
86    if let Ok(dt) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") {
87        return Ok(dt);
88    }
89    if let Ok(d) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
90        return Ok(d.and_hms_opt(0, 0, 0).unwrap());
91    }
92    Err(DataError::InvalidTimestamp(format!(
93        "expected YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS, got: {s}"
94    )))
95}
96
97/// Parse an optional f64 from a CSV field (empty or whitespace → None).
98pub fn parse_optional_f64(field: Option<&str>) -> Option<f64> {
99    field
100        .map(|s| s.trim())
101        .filter(|s| !s.is_empty())
102        .and_then(|s| s.parse().ok())
103}
104
105/// Parse an optional i32 from a CSV field (empty or whitespace → None).
106pub fn parse_optional_i32(field: Option<&str>) -> Option<i32> {
107    field
108        .map(|s| s.trim())
109        .filter(|s| !s.is_empty())
110        .and_then(|s| s.parse().ok())
111}
112
113/// Parse a required f64 field; returns Err(warning string) if missing or invalid.
114pub fn parse_required_f64(
115    field: Option<&str>,
116    name: &str,
117    line: usize,
118) -> std::result::Result<f64, String> {
119    field
120        .map(str::trim)
121        .filter(|s| !s.is_empty())
122        .and_then(|s| s.parse::<f64>().ok())
123        .ok_or_else(|| format!("line {}: missing or invalid {}", line, name))
124}
125
126/// Parse a required i64 field; returns None if missing or invalid.
127pub fn parse_required_i64(field: Option<&str>) -> Option<i64> {
128    field
129        .map(str::trim)
130        .filter(|s| !s.is_empty())
131        .and_then(|s| s.parse().ok())
132}
133
134/// Parse a required i32 field; returns None if missing or invalid.
135pub fn parse_required_i32(field: Option<&str>) -> Option<i32> {
136    field
137        .map(str::trim)
138        .filter(|s| !s.is_empty())
139        .and_then(|s| s.parse().ok())
140}