import pandas as pd
import seaborn as sns
if __name__ == "__main__":
df = sns.load_dataset("titanic")
X = df.select_dtypes("number").drop(columns=["survived"]).astype(float)
y = df["survived"].astype(float)
pd.Series(X.fillna(0).to_numpy().ravel(order="F")).to_csv(
"resources/contiguous_no_missing.csv",
index=False,
header=False,
)
pd.Series(X.to_numpy().ravel(order="F")).to_csv(
"resources/contiguous_with_missing.csv",
index=False,
header=False,
)
y.to_csv(
"resources/performance.csv",
index=False,
header=False,
)
X.fare.to_csv(
"resources/performance-fare.csv",
index=False,
header=False,
)
dfb = df.sample(
100_000,
random_state=0,
replace=True,
).reset_index(drop=True)
Xb = dfb.select_dtypes("number").drop(columns=["survived"]).astype(float)
yb = dfb["survived"].astype(float)
pd.Series(Xb.fillna(0).to_numpy().ravel(order="F")).to_csv(
"resources/contiguous_no_missing_100k_samp_seed0.csv",
index=False,
header=False,
)
yb.to_csv(
"resources/performance_100k_samp_seed0.csv",
index=False,
header=False,
)