1use anyhow::{Context, Result};
3use regex::Regex;
4use std::fs::File;
5use std::io::Cursor;
6use std::path::Path;
7use std::time::Duration;
8use tracing::{debug, info};
9
10use crate::data::datatable::DataTable;
11use crate::data::stream_loader::{load_csv_from_reader, load_json_from_reader};
12use crate::sql::parser::ast::{DataFormat, WebCTESpec};
13
14pub struct WebDataFetcher {
16 client: reqwest::blocking::Client,
17}
18
19impl WebDataFetcher {
20 pub fn new() -> Result<Self> {
21 let client = reqwest::blocking::Client::builder()
22 .timeout(Duration::from_secs(30))
23 .user_agent("sql-cli/1.0")
24 .build()?;
25
26 Ok(Self { client })
27 }
28
29 pub fn fetch(&self, spec: &WebCTESpec, table_name: &str) -> Result<DataTable> {
31 info!("Fetching data from URL: {}", spec.url);
32
33 if spec.url.starts_with("file://") {
35 return self.fetch_file(spec, table_name);
36 }
37
38 let mut request = self.client.get(&spec.url);
41
42 for (key, value) in &spec.headers {
44 let resolved_value = self.resolve_env_var(value)?;
45 request = request.header(key, resolved_value);
46 }
47
48 let response = request
50 .send()
51 .with_context(|| format!("Failed to fetch from URL: {}", spec.url))?;
52
53 if !response.status().is_success() {
55 return Err(anyhow::anyhow!(
56 "HTTP request failed with status {}: {}",
57 response.status(),
58 spec.url
59 ));
60 }
61
62 let content_type = response
64 .headers()
65 .get("content-type")
66 .and_then(|v| v.to_str().ok())
67 .unwrap_or("")
68 .to_string();
69
70 debug!("Response content-type: {}", content_type);
71
72 let bytes = response.bytes()?;
74
75 let format = match &spec.format {
77 Some(fmt) => fmt.clone(),
78 None => self.detect_format(&spec.url, &content_type),
79 };
80
81 info!("Using format: {:?} for {}", format, spec.url);
82
83 self.parse_data(bytes.to_vec(), format, table_name, "web", &spec.url)
85 }
86
87 fn fetch_file(&self, spec: &WebCTESpec, table_name: &str) -> Result<DataTable> {
89 let file_path = if spec.url.starts_with("file://") {
91 &spec.url[7..] } else {
93 &spec.url
94 };
95
96 info!("Reading local file: {}", file_path);
97
98 let path = Path::new(file_path);
100 if !path.exists() {
101 return Err(anyhow::anyhow!("File not found: {}", file_path));
102 }
103
104 let file =
106 File::open(path).with_context(|| format!("Failed to open file: {}", file_path))?;
107
108 let metadata = file.metadata()?;
110 let file_size = metadata.len();
111 debug!("File size: {} bytes", file_size);
112
113 let format = match &spec.format {
115 Some(fmt) => fmt.clone(),
116 None => self.detect_format(file_path, ""),
117 };
118
119 info!("Using format: {:?} for {}", format, file_path);
120
121 match format {
123 DataFormat::CSV => load_csv_from_reader(file, table_name, "file", file_path)
124 .with_context(|| format!("Failed to parse CSV from {}", file_path)),
125 DataFormat::JSON => load_json_from_reader(file, table_name, "file", file_path)
126 .with_context(|| format!("Failed to parse JSON from {}", file_path)),
127 DataFormat::Auto => {
128 if file_path.ends_with(".json") {
130 let file = File::open(path)?;
131 load_json_from_reader(file, table_name, "file", file_path)
132 .with_context(|| format!("Failed to parse JSON from {}", file_path))
133 } else {
134 let file = File::open(path)?;
136 load_csv_from_reader(file, table_name, "file", file_path)
137 .with_context(|| format!("Failed to parse CSV from {}", file_path))
138 }
139 }
140 }
141 }
142
143 fn parse_data(
145 &self,
146 bytes: Vec<u8>,
147 format: DataFormat,
148 table_name: &str,
149 source_type: &str,
150 source_path: &str,
151 ) -> Result<DataTable> {
152 match format {
153 DataFormat::CSV => {
154 let reader = Cursor::new(bytes);
155 load_csv_from_reader(reader, table_name, source_type, source_path)
156 .with_context(|| format!("Failed to parse CSV from {}", source_path))
157 }
158 DataFormat::JSON => {
159 let reader = Cursor::new(bytes);
160 load_json_from_reader(reader, table_name, source_type, source_path)
161 .with_context(|| format!("Failed to parse JSON from {}", source_path))
162 }
163 DataFormat::Auto => {
164 let reader_csv = Cursor::new(bytes.clone());
166 match load_csv_from_reader(reader_csv, table_name, source_type, source_path) {
167 Ok(table) => Ok(table),
168 Err(_) => {
169 debug!("CSV parsing failed, trying JSON");
170 let reader_json = Cursor::new(bytes);
171 load_json_from_reader(reader_json, table_name, source_type, source_path)
172 .with_context(|| format!("Failed to parse data from {}", source_path))
173 }
174 }
175 }
176 }
177 }
178
179 fn detect_format(&self, url: &str, content_type: &str) -> DataFormat {
181 if content_type.contains("json") {
183 return DataFormat::JSON;
184 }
185 if content_type.contains("csv") || content_type.contains("text/plain") {
186 return DataFormat::CSV;
187 }
188
189 if url.ends_with(".json") {
191 DataFormat::JSON
192 } else if url.ends_with(".csv") {
193 DataFormat::CSV
194 } else {
195 DataFormat::Auto
197 }
198 }
199
200 fn resolve_env_var(&self, value: &str) -> Result<String> {
202 let mut result = value.to_string();
203
204 let re = Regex::new(r"\$\{([^}]+)\}").unwrap();
207 for cap in re.captures_iter(value) {
208 let var_name = &cap[1];
209 match std::env::var(var_name) {
210 Ok(var_value) => {
211 result = result.replace(&cap[0], &var_value);
212 }
213 Err(_) => {
214 debug!(
217 "Environment variable {} not found, keeping placeholder",
218 var_name
219 );
220 }
221 }
222 }
223
224 if result.starts_with('$') && !result.starts_with("${") {
226 let var_name = &result[1..];
227 if let Ok(var_value) = std::env::var(var_name) {
228 return Ok(var_value);
229 }
230 }
231
232 Ok(result)
233 }
234}
235
236#[cfg(test)]
237mod tests {
238 use super::*;
239
240 #[test]
241 fn test_detect_format() {
242 let fetcher = WebDataFetcher::new().unwrap();
243
244 assert!(matches!(
246 fetcher.detect_format("http://example.com/data.csv", ""),
247 DataFormat::CSV
248 ));
249 assert!(matches!(
250 fetcher.detect_format("http://example.com/data.json", ""),
251 DataFormat::JSON
252 ));
253
254 assert!(matches!(
256 fetcher.detect_format("http://example.com/data", "application/json"),
257 DataFormat::JSON
258 ));
259 assert!(matches!(
260 fetcher.detect_format("http://example.com/data", "text/csv"),
261 DataFormat::CSV
262 ));
263
264 assert!(matches!(
266 fetcher.detect_format("http://example.com/data", ""),
267 DataFormat::Auto
268 ));
269 }
270}