1use super::error::DefaultCodebookError;
3use lace_codebook::Codebook;
4use polars::frame::DataFrame;
5use std::fmt;
6
7#[cfg(feature = "formats")]
8use std::ffi::{OsStr, OsString};
9#[cfg(feature = "formats")]
10use std::path::PathBuf;
11
12#[cfg(not(feature = "formats"))]
14#[derive(Debug, Clone, PartialEq)]
15pub enum DataSource {
16 Polars(DataFrame),
18 Empty,
20}
21
22#[cfg(feature = "formats")]
24#[derive(Debug, Clone, PartialEq)]
25pub enum DataSource {
26 Csv(PathBuf),
28 Ipc(PathBuf),
30 Json(PathBuf),
32 Parquet(PathBuf),
34 Polars(DataFrame),
36 Empty,
38}
39
40#[cfg(feature = "formats")]
42#[derive(Clone, Debug, PartialEq)]
43pub struct UnknownExtension(pub Option<OsString>);
44
45#[cfg(feature = "formats")]
46impl std::fmt::Display for UnknownExtension {
47 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
48 write!(f, "Unknown Extension: {:?}", self.0)
49 }
50}
51
52#[cfg(feature = "formats")]
53impl std::error::Error for UnknownExtension {}
54
55#[cfg(feature = "formats")]
56impl TryFrom<PathBuf> for DataSource {
57 type Error = UnknownExtension;
58
59 fn try_from(value: PathBuf) -> Result<Self, Self::Error> {
60 match value
61 .extension()
62 .and_then(OsStr::to_str)
63 .map(str::to_lowercase)
64 .ok_or_else(|| {
65 UnknownExtension(value.extension().map(OsStr::to_os_string))
66 })?
67 .as_ref()
68 {
69 "csv" | "csv.gz" => Ok(Self::Csv(value)),
70 "gz" if value.ends_with("") => Ok(Self::Csv(value)),
71 "json" | "jsonl" => Ok(Self::Json(value)),
72 "parquet" => Ok(Self::Parquet(value)),
73 "arrow" | "ipc" => Ok(Self::Ipc(value)),
74 _ => Err(UnknownExtension(
75 value.extension().map(OsStr::to_os_string),
76 )),
77 }
78 }
79}
80
81#[cfg(feature = "formats")]
82impl TryFrom<DataSource> for PathBuf {
83 type Error = &'static str;
84 fn try_from(src: DataSource) -> Result<Self, Self::Error> {
85 match src {
86 DataSource::Parquet(s)
87 | DataSource::Csv(s)
88 | DataSource::Json(s)
89 | DataSource::Ipc(s) => Ok(s),
90 DataSource::Empty => {
91 Err("DataSource::EMPTY has no path information")
92 }
93 DataSource::Polars(_) => {
94 Err("DataSource::Polars has no corresponding path")
95 }
96 }
97 }
98}
99
100impl From<DataFrame> for DataSource {
101 fn from(value: DataFrame) -> Self {
102 Self::Polars(value)
103 }
104}
105
106#[cfg(feature = "formats")]
107impl fmt::Display for DataSource {
108 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
109 write!(
110 f,
111 "{}",
112 self.to_os_string()
113 .and_then(|s| s.into_string().ok())
114 .unwrap_or_else(|| "EMPTY".to_owned())
115 )
116 }
117}
118
119#[cfg(not(feature = "formats"))]
120impl fmt::Display for DataSource {
121 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
122 match self {
123 Self::Polars(df) => {
124 write!(f, "polars::DataFrame {:?}", df.shape())
125 }
126 Self::Empty => {
127 write!(f, "Empty")
128 }
129 }
130 }
131}
132
133#[cfg(feature = "formats")]
134impl DataSource {
135 pub fn to_os_string(&self) -> Option<OsString> {
136 match self {
137 DataSource::Parquet(s)
138 | DataSource::Csv(s)
139 | DataSource::Json(s)
140 | DataSource::Ipc(s) => Some(s),
141 DataSource::Empty | DataSource::Polars(_) => None,
142 }
143 .map(|x| x.clone().into_os_string())
144 }
145
146 pub fn default_codebook(&self) -> Result<Codebook, DefaultCodebookError> {
148 use crate::codebook::{data, formats};
149 let codebook = match &self {
150 DataSource::Ipc(path) => {
151 formats::codebook_from_ipc(path, None, None, None, false)
152 }
153 DataSource::Csv(path) => {
154 formats::codebook_from_csv(path, None, None, None, false)
155 }
156 DataSource::Json(path) => {
157 formats::codebook_from_json(path, None, None, None, false)
158 }
159 DataSource::Parquet(path) => {
160 formats::codebook_from_parquet(path, None, None, None, false)
161 }
162 DataSource::Polars(df) => {
163 data::df_to_codebook(df, None, None, None, false)
164 }
165 DataSource::Empty => Ok(Codebook::default()),
166 }?;
167 Ok(codebook)
168 }
169}
170
171#[cfg(not(feature = "formats"))]
172impl DataSource {
173 pub fn default_codebook(&self) -> Result<Codebook, DefaultCodebookError> {
175 use crate::codebook::data;
176 let codebook = match &self {
177 DataSource::Polars(df) => {
178 data::df_to_codebook(df, None, None, None, false)
179 }
180 DataSource::Empty => Ok(Codebook::default()),
181 }?;
182 Ok(codebook)
183 }
184}