use polars::prelude::*;
pub fn analyze_price_gaps(
df: &DataFrame,
price_threshold: Option<f64>,
date_col: Option<&str>,
) -> PolarsResult<(Series, Series, Series)> {
let threshold = price_threshold.unwrap_or(0.5);
let date_column = date_col.unwrap_or("date");
for col in ["open", "close", date_column].iter() {
if !df.schema().contains(*col) {
return Err(PolarsError::ComputeError(
format!("Required column '{}' not found", col).into(),
));
}
}
let open = df.column("open")?.f64()?;
let close = df.column("close")?.f64()?;
let high = df.column("high")?.f64()?;
let low = df.column("low")?.f64()?;
let mut gap_size = Vec::with_capacity(df.height());
let mut gap_type = Vec::with_capacity(df.height());
let mut gap_fill_pct = Vec::with_capacity(df.height());
let dates = df.column(date_column)?;
gap_size.push(0.0);
gap_type.push(0); gap_fill_pct.push(100.0);
for i in 1..df.height() {
let current_open = open.get(i).unwrap_or(f64::NAN);
let prev_close = close.get(i - 1).unwrap_or(f64::NAN);
let current_high = high.get(i).unwrap_or(f64::NAN);
let current_low = low.get(i).unwrap_or(f64::NAN);
let is_new_session = match dates.dtype() {
DataType::Date => {
let current_date = dates.date()?.get(i);
let prev_date = dates.date()?.get(i - 1);
current_date != prev_date
},
DataType::Utf8 => {
let current_date = dates.str()?.get(i).unwrap_or("");
let prev_date = dates.str()?.get(i - 1).unwrap_or("");
current_date != prev_date
},
DataType::Datetime(_, _) => {
let current_date = dates.datetime()?.get(i);
let prev_date = dates.datetime()?.get(i - 1);
current_date.map(|d| d / 86400000).unwrap_or(0) !=
prev_date.map(|d| d / 86400000).unwrap_or(0)
},
_ => false, };
if is_new_session && !current_open.is_nan() && !prev_close.is_nan() && prev_close != 0.0 {
let gap_pct = (current_open - prev_close) / prev_close * 100.0;
if gap_pct.abs() >= threshold {
gap_size.push(gap_pct);
if gap_pct > 0.0 {
gap_type.push(1);
if !current_low.is_nan() && current_low < current_open {
let gap_size_points = current_open - prev_close;
let fill_points = current_open - current_low.max(prev_close);
let fill_percentage = if gap_size_points > 0.0 {
(fill_points / gap_size_points * 100.0).min(100.0)
} else {
100.0
};
gap_fill_pct.push(fill_percentage);
} else {
gap_fill_pct.push(0.0); }
} else {
gap_type.push(-1);
if !current_high.is_nan() && current_high > current_open {
let gap_size_points = prev_close - current_open;
let fill_points = current_high.min(prev_close) - current_open;
let fill_percentage = if gap_size_points > 0.0 {
(fill_points / gap_size_points * 100.0).min(100.0)
} else {
100.0
};
gap_fill_pct.push(fill_percentage);
} else {
gap_fill_pct.push(0.0); }
}
} else {
gap_size.push(0.0);
gap_type.push(0);
gap_fill_pct.push(100.0);
}
} else {
gap_size.push(0.0);
gap_type.push(0);
gap_fill_pct.push(100.0);
}
}
Ok((
Series::new("gap_size_pct".into(), gap_size),
Series::new("gap_type".into(), gap_type),
Series::new("gap_fill_pct".into(), gap_fill_pct),
))
}
pub fn add_gap_analysis(df: &mut DataFrame, price_threshold: Option<f64>) -> PolarsResult<()> {
let (gap_size, gap_type, gap_fill) = analyze_price_gaps(df, price_threshold, None)?;
df.with_column(gap_size)?;
df.with_column(gap_type)?;
df.with_column(gap_fill)?;
let gap_type_vals = gap_type.i32()?;
let gap_size_vals = gap_size.f64()?;
let mut gap_fade_signal = Vec::with_capacity(df.height());
for i in 0..df.height() {
let g_type = gap_type_vals.get(i).unwrap_or(0);
let g_size = gap_size_vals.get(i).unwrap_or(0.0);
if g_type > 0 && g_size > 2.0 {
gap_fade_signal.push(-1);
} else if g_type < 0 && g_size.abs() > 2.0 {
gap_fade_signal.push(1);
} else if g_type != 0 {
gap_fade_signal.push(g_type);
} else {
gap_fade_signal.push(0);
}
}
df.with_column(Series::new("gap_trade_signal".into(), gap_fade_signal))?;
Ok(())
}
pub fn calculate_gap_fill_probability(
df: &DataFrame,
gap_size_bins: &[f64],
) -> PolarsResult<Vec<(f64, f64)>> {
if !df.schema().contains("gap_type") ||
!df.schema().contains("gap_size_pct") ||
!df.schema().contains("gap_fill_pct") {
return Err(PolarsError::ComputeError(
"Required gap analysis columns not found. Run gap analysis first.".into(),
));
}
let gap_type = df.column("gap_type")?.i32()?;
let gap_size = df.column("gap_size_pct")?.f64()?;
let gap_fill = df.column("gap_fill_pct")?.f64()?;
let mut results = Vec::new();
for &size_threshold in gap_size_bins {
let mut total_count = 0;
let mut filled_count = 0;
for i in 0..df.height() {
let g_type = gap_type.get(i).unwrap_or(0);
let g_size = gap_size.get(i).unwrap_or(0.0).abs();
let g_fill = gap_fill.get(i).unwrap_or(0.0);
if g_type != 0 && g_size >= size_threshold && g_size < size_threshold + 1.0 {
total_count += 1;
if g_fill >= 80.0 {
filled_count += 1;
}
}
}
let probability = if total_count > 0 {
filled_count as f64 / total_count as f64 * 100.0
} else {
0.0
};
results.push((size_threshold, probability));
}
Ok(results)
}