sql_cli/web/
http_fetcher.rs1use anyhow::{Context, Result};
3use regex::Regex;
4use std::io::Cursor;
5use std::time::Duration;
6use tracing::{debug, info};
7
8use crate::data::datatable::DataTable;
9use crate::data::stream_loader::{load_csv_from_reader, load_json_from_reader};
10use crate::sql::parser::ast::{DataFormat, WebCTESpec};
11
12pub struct WebDataFetcher {
14 client: reqwest::blocking::Client,
15}
16
17impl WebDataFetcher {
18 pub fn new() -> Result<Self> {
19 let client = reqwest::blocking::Client::builder()
20 .timeout(Duration::from_secs(30))
21 .user_agent("sql-cli/1.0")
22 .build()?;
23
24 Ok(Self { client })
25 }
26
27 pub fn fetch(&self, spec: &WebCTESpec, table_name: &str) -> Result<DataTable> {
29 info!("Fetching data from URL: {}", spec.url);
30
31 let mut request = self.client.get(&spec.url);
33
34 for (key, value) in &spec.headers {
36 let resolved_value = self.resolve_env_var(value)?;
37 request = request.header(key, resolved_value);
38 }
39
40 let response = request
42 .send()
43 .with_context(|| format!("Failed to fetch from URL: {}", spec.url))?;
44
45 if !response.status().is_success() {
47 return Err(anyhow::anyhow!(
48 "HTTP request failed with status {}: {}",
49 response.status(),
50 spec.url
51 ));
52 }
53
54 let content_type = response
56 .headers()
57 .get("content-type")
58 .and_then(|v| v.to_str().ok())
59 .unwrap_or("")
60 .to_string();
61
62 debug!("Response content-type: {}", content_type);
63
64 let bytes = response.bytes()?;
66
67 let format = match &spec.format {
69 Some(fmt) => fmt.clone(),
70 None => self.detect_format(&spec.url, &content_type),
71 };
72
73 info!("Using format: {:?} for {}", format, spec.url);
74
75 match format {
77 DataFormat::CSV => {
78 let reader = Cursor::new(bytes);
79 load_csv_from_reader(reader, table_name, "web", &spec.url)
80 .with_context(|| format!("Failed to parse CSV from {}", spec.url))
81 }
82 DataFormat::JSON => {
83 let reader = Cursor::new(bytes);
84 load_json_from_reader(reader, table_name, "web", &spec.url)
85 .with_context(|| format!("Failed to parse JSON from {}", spec.url))
86 }
87 DataFormat::Auto => {
88 let reader_csv = Cursor::new(bytes.clone());
90 match load_csv_from_reader(reader_csv, table_name, "web", &spec.url) {
91 Ok(table) => Ok(table),
92 Err(_) => {
93 debug!("CSV parsing failed, trying JSON");
94 let reader_json = Cursor::new(bytes);
95 load_json_from_reader(reader_json, table_name, "web", &spec.url)
96 .with_context(|| format!("Failed to parse data from {}", spec.url))
97 }
98 }
99 }
100 }
101 }
102
103 fn detect_format(&self, url: &str, content_type: &str) -> DataFormat {
105 if content_type.contains("json") {
107 return DataFormat::JSON;
108 }
109 if content_type.contains("csv") || content_type.contains("text/plain") {
110 return DataFormat::CSV;
111 }
112
113 if url.ends_with(".json") {
115 DataFormat::JSON
116 } else if url.ends_with(".csv") {
117 DataFormat::CSV
118 } else {
119 DataFormat::Auto
121 }
122 }
123
124 fn resolve_env_var(&self, value: &str) -> Result<String> {
126 let mut result = value.to_string();
127
128 let re = Regex::new(r"\$\{([^}]+)\}").unwrap();
131 for cap in re.captures_iter(value) {
132 let var_name = &cap[1];
133 match std::env::var(var_name) {
134 Ok(var_value) => {
135 result = result.replace(&cap[0], &var_value);
136 }
137 Err(_) => {
138 debug!(
141 "Environment variable {} not found, keeping placeholder",
142 var_name
143 );
144 }
145 }
146 }
147
148 if result.starts_with('$') && !result.starts_with("${") {
150 let var_name = &result[1..];
151 if let Ok(var_value) = std::env::var(var_name) {
152 return Ok(var_value);
153 }
154 }
155
156 Ok(result)
157 }
158}
159
160#[cfg(test)]
161mod tests {
162 use super::*;
163
164 #[test]
165 fn test_detect_format() {
166 let fetcher = WebDataFetcher::new().unwrap();
167
168 assert!(matches!(
170 fetcher.detect_format("http://example.com/data.csv", ""),
171 DataFormat::CSV
172 ));
173 assert!(matches!(
174 fetcher.detect_format("http://example.com/data.json", ""),
175 DataFormat::JSON
176 ));
177
178 assert!(matches!(
180 fetcher.detect_format("http://example.com/data", "application/json"),
181 DataFormat::JSON
182 ));
183 assert!(matches!(
184 fetcher.detect_format("http://example.com/data", "text/csv"),
185 DataFormat::CSV
186 ));
187
188 assert!(matches!(
190 fetcher.detect_format("http://example.com/data", ""),
191 DataFormat::Auto
192 ));
193 }
194}