sql_cli/web/
http_fetcher.rs

1// HTTP fetcher for WEB CTEs
2use anyhow::{Context, Result};
3use regex::Regex;
4use std::io::Cursor;
5use std::time::Duration;
6use tracing::{debug, info};
7
8use crate::data::datatable::DataTable;
9use crate::data::stream_loader::{load_csv_from_reader, load_json_from_reader};
10use crate::sql::parser::ast::{DataFormat, WebCTESpec};
11
12/// Fetches data from a URL and converts it to a DataTable
13pub struct WebDataFetcher {
14    client: reqwest::blocking::Client,
15}
16
17impl WebDataFetcher {
18    pub fn new() -> Result<Self> {
19        let client = reqwest::blocking::Client::builder()
20            .timeout(Duration::from_secs(30))
21            .user_agent("sql-cli/1.0")
22            .build()?;
23
24        Ok(Self { client })
25    }
26
27    /// Fetch data from a WEB CTE specification
28    pub fn fetch(&self, spec: &WebCTESpec, table_name: &str) -> Result<DataTable> {
29        info!("Fetching data from URL: {}", spec.url);
30
31        // Build request
32        let mut request = self.client.get(&spec.url);
33
34        // Add headers if provided
35        for (key, value) in &spec.headers {
36            let resolved_value = self.resolve_env_var(value)?;
37            request = request.header(key, resolved_value);
38        }
39
40        // Execute request
41        let response = request
42            .send()
43            .with_context(|| format!("Failed to fetch from URL: {}", spec.url))?;
44
45        // Check status
46        if !response.status().is_success() {
47            return Err(anyhow::anyhow!(
48                "HTTP request failed with status {}: {}",
49                response.status(),
50                spec.url
51            ));
52        }
53
54        // Get content type for format detection
55        let content_type = response
56            .headers()
57            .get("content-type")
58            .and_then(|v| v.to_str().ok())
59            .unwrap_or("")
60            .to_string();
61
62        debug!("Response content-type: {}", content_type);
63
64        // Read response body
65        let bytes = response.bytes()?;
66
67        // Determine format
68        let format = match &spec.format {
69            Some(fmt) => fmt.clone(),
70            None => self.detect_format(&spec.url, &content_type),
71        };
72
73        info!("Using format: {:?} for {}", format, spec.url);
74
75        // Parse based on format
76        match format {
77            DataFormat::CSV => {
78                let reader = Cursor::new(bytes);
79                load_csv_from_reader(reader, table_name, "web", &spec.url)
80                    .with_context(|| format!("Failed to parse CSV from {}", spec.url))
81            }
82            DataFormat::JSON => {
83                let reader = Cursor::new(bytes);
84                load_json_from_reader(reader, table_name, "web", &spec.url)
85                    .with_context(|| format!("Failed to parse JSON from {}", spec.url))
86            }
87            DataFormat::Auto => {
88                // Try CSV first, then JSON
89                let reader_csv = Cursor::new(bytes.clone());
90                match load_csv_from_reader(reader_csv, table_name, "web", &spec.url) {
91                    Ok(table) => Ok(table),
92                    Err(_) => {
93                        debug!("CSV parsing failed, trying JSON");
94                        let reader_json = Cursor::new(bytes);
95                        load_json_from_reader(reader_json, table_name, "web", &spec.url)
96                            .with_context(|| format!("Failed to parse data from {}", spec.url))
97                    }
98                }
99            }
100        }
101    }
102
103    /// Detect format from URL extension or content type
104    fn detect_format(&self, url: &str, content_type: &str) -> DataFormat {
105        // Check content type first
106        if content_type.contains("json") {
107            return DataFormat::JSON;
108        }
109        if content_type.contains("csv") || content_type.contains("text/plain") {
110            return DataFormat::CSV;
111        }
112
113        // Check URL extension
114        if url.ends_with(".json") {
115            DataFormat::JSON
116        } else if url.ends_with(".csv") {
117            DataFormat::CSV
118        } else {
119            // Default to auto-detect
120            DataFormat::Auto
121        }
122    }
123
124    /// Resolve environment variables in values (${VAR_NAME} or $VAR_NAME syntax)
125    fn resolve_env_var(&self, value: &str) -> Result<String> {
126        let mut result = value.to_string();
127
128        // Handle ${VAR} syntax - can be embedded in strings
129        // Use lazy_static for better performance, but for now just compile inline
130        let re = Regex::new(r"\$\{([^}]+)\}").unwrap();
131        for cap in re.captures_iter(value) {
132            let var_name = &cap[1];
133            match std::env::var(var_name) {
134                Ok(var_value) => {
135                    result = result.replace(&cap[0], &var_value);
136                }
137                Err(_) => {
138                    // For security, don't expose which env vars exist
139                    // Just log a debug message and keep the placeholder
140                    debug!(
141                        "Environment variable {} not found, keeping placeholder",
142                        var_name
143                    );
144                }
145            }
146        }
147
148        // Also handle simple $VAR syntax at the start of the string
149        if result.starts_with('$') && !result.starts_with("${") {
150            let var_name = &result[1..];
151            if let Ok(var_value) = std::env::var(var_name) {
152                return Ok(var_value);
153            }
154        }
155
156        Ok(result)
157    }
158}
159
160#[cfg(test)]
161mod tests {
162    use super::*;
163
164    #[test]
165    fn test_detect_format() {
166        let fetcher = WebDataFetcher::new().unwrap();
167
168        // Test URL-based detection
169        assert!(matches!(
170            fetcher.detect_format("http://example.com/data.csv", ""),
171            DataFormat::CSV
172        ));
173        assert!(matches!(
174            fetcher.detect_format("http://example.com/data.json", ""),
175            DataFormat::JSON
176        ));
177
178        // Test content-type detection
179        assert!(matches!(
180            fetcher.detect_format("http://example.com/data", "application/json"),
181            DataFormat::JSON
182        ));
183        assert!(matches!(
184            fetcher.detect_format("http://example.com/data", "text/csv"),
185            DataFormat::CSV
186        ));
187
188        // Test auto-detect fallback
189        assert!(matches!(
190            fetcher.detect_format("http://example.com/data", ""),
191            DataFormat::Auto
192        ));
193    }
194}