Skip to main content

sql_cli/sql/parser/
web_cte_parser.rs

1/// Web CTE Parser Module
2/// Handles parsing of WEB CTEs for HTTP data fetching with custom selectors
3use super::ast::{DataFormat, HttpMethod, WebCTESpec};
4use super::lexer::Token;
5
6pub struct WebCteParser<'a> {
7    _tokens: &'a mut dyn Iterator<Item = Token>,
8    _current_token: Token,
9}
10
11impl<'a> WebCteParser<'a> {
12    pub fn new(tokens: &'a mut dyn Iterator<Item = Token>, current_token: Token) -> Self {
13        Self {
14            _tokens: tokens,
15            _current_token: current_token,
16        }
17    }
18
19    /// Main entry point to parse WEB CTE specification
20    /// Expects: URL 'url' [METHOD method] [FORMAT format] [HEADERS (...)] [CACHE n] [BODY 'body'] [FORM_FILE 'field' 'path'] [FORM_FIELD 'field' 'value']
21    pub fn parse(parser: &mut crate::sql::recursive_parser::Parser) -> Result<WebCTESpec, String> {
22        // Expect URL keyword
23        if let Token::Identifier(id) = &parser.current_token {
24            if id.to_uppercase() != "URL" {
25                return Err("Expected URL keyword in WEB CTE".to_string());
26            }
27        } else {
28            return Err("Expected URL keyword in WEB CTE".to_string());
29        }
30        parser.advance();
31
32        // Parse URL string
33        let url = match &parser.current_token {
34            Token::StringLiteral(url) => url.clone(),
35            _ => return Err("Expected URL string after URL keyword".to_string()),
36        };
37        parser.advance();
38
39        // Initialize optional fields
40        let mut format = None;
41        let mut headers = Vec::new();
42        let mut cache_seconds = None;
43        let mut method = None;
44        let mut body = None;
45        let mut json_path = None;
46        let mut form_files = Vec::new();
47        let mut form_fields = Vec::new();
48        let mut delimiter: Option<u8> = None;
49
50        // Parse optional clauses until we hit the closing parenthesis
51        while !matches!(parser.current_token, Token::RightParen)
52            && !matches!(parser.current_token, Token::Eof)
53        {
54            if let Token::Identifier(id) = &parser.current_token {
55                match id.to_uppercase().as_str() {
56                    "FORMAT" => {
57                        parser.advance();
58                        format = Some(Self::parse_data_format(parser)?);
59                    }
60                    "CACHE" => {
61                        parser.advance();
62                        cache_seconds = Some(Self::parse_cache_duration(parser)?);
63                    }
64                    "HEADERS" => {
65                        parser.advance();
66                        headers = Self::parse_headers(parser)?;
67                    }
68                    "METHOD" => {
69                        parser.advance();
70                        method = Some(Self::parse_http_method(parser)?);
71                    }
72                    "BODY" => {
73                        parser.advance();
74                        body = Some(Self::parse_body(parser)?);
75                    }
76                    "JSON_PATH" => {
77                        parser.advance();
78                        json_path = Some(Self::parse_json_path(parser)?);
79                    }
80                    "FORM_FILE" => {
81                        parser.advance();
82                        let (field_name, file_path) = Self::parse_form_file(parser)?;
83                        form_files.push((field_name, file_path));
84                    }
85                    "FORM_FIELD" => {
86                        parser.advance();
87                        let (field_name, value) = Self::parse_form_field(parser)?;
88                        form_fields.push((field_name, value));
89                    }
90                    "DELIMITER" => {
91                        parser.advance();
92                        delimiter = Some(Self::parse_delimiter(parser)?);
93                    }
94                    _ => {
95                        return Err(format!(
96                            "Unexpected keyword '{}' in WEB CTE specification",
97                            id
98                        ));
99                    }
100                }
101            } else {
102                break;
103            }
104        }
105
106        Ok(WebCTESpec {
107            url,
108            format,
109            headers,
110            cache_seconds,
111            method,
112            body,
113            json_path,
114            form_files,
115            form_fields,
116            template_vars: Vec::new(), // Will be populated by template expander
117            delimiter,
118        })
119    }
120
121    /// Parse a DELIMITER clause value: a single ASCII char or backslash escape
122    /// (`\t`, `\n`, `\r`). Reuses the shared parser from stream_loader so the
123    /// rules match `--delimiter` and `READ_CSV(_, 'x')` exactly.
124    fn parse_delimiter(parser: &mut crate::sql::recursive_parser::Parser) -> Result<u8, String> {
125        let raw = match &parser.current_token {
126            Token::StringLiteral(s) => s.clone(),
127            _ => return Err("Expected string literal after DELIMITER".to_string()),
128        };
129        let byte = crate::data::stream_loader::parse_delimiter_arg(&raw)
130            .map_err(|e| format!("Invalid DELIMITER: {}", e))?;
131        parser.advance();
132        Ok(byte)
133    }
134
135    fn parse_data_format(
136        parser: &mut crate::sql::recursive_parser::Parser,
137    ) -> Result<DataFormat, String> {
138        if let Token::Identifier(id) = &parser.current_token {
139            let format = match id.to_uppercase().as_str() {
140                "CSV" => DataFormat::CSV,
141                "JSON" => DataFormat::JSON,
142                "AUTO" => DataFormat::Auto,
143                _ => return Err(format!("Unknown data format: {}", id)),
144            };
145            parser.advance();
146            Ok(format)
147        } else {
148            Err("Expected data format (CSV, JSON, or AUTO)".to_string())
149        }
150    }
151
152    fn parse_cache_duration(
153        parser: &mut crate::sql::recursive_parser::Parser,
154    ) -> Result<u64, String> {
155        match &parser.current_token {
156            Token::NumberLiteral(n) => {
157                let duration = n
158                    .parse::<u64>()
159                    .map_err(|_| format!("Invalid cache duration: {}", n))?;
160                parser.advance();
161                Ok(duration)
162            }
163            _ => Err("Expected number for cache duration".to_string()),
164        }
165    }
166
167    fn parse_http_method(
168        parser: &mut crate::sql::recursive_parser::Parser,
169    ) -> Result<HttpMethod, String> {
170        if let Token::Identifier(id) = &parser.current_token {
171            let method = match id.to_uppercase().as_str() {
172                "GET" => HttpMethod::GET,
173                "POST" => HttpMethod::POST,
174                "PUT" => HttpMethod::PUT,
175                "DELETE" => HttpMethod::DELETE,
176                "PATCH" => HttpMethod::PATCH,
177                _ => return Err(format!("Unknown HTTP method: {}", id)),
178            };
179            parser.advance();
180            Ok(method)
181        } else {
182            Err("Expected HTTP method (GET, POST, PUT, DELETE, PATCH)".to_string())
183        }
184    }
185
186    fn parse_body(parser: &mut crate::sql::recursive_parser::Parser) -> Result<String, String> {
187        match &parser.current_token {
188            Token::StringLiteral(body) | Token::JsonBlock(body) => {
189                let body = body.clone();
190                parser.advance();
191                Ok(body)
192            }
193            _ => Err("Expected string literal or $JSON$ block for BODY clause".to_string()),
194        }
195    }
196
197    fn parse_json_path(
198        parser: &mut crate::sql::recursive_parser::Parser,
199    ) -> Result<String, String> {
200        match &parser.current_token {
201            Token::StringLiteral(path) => {
202                let path = path.clone();
203                parser.advance();
204                Ok(path)
205            }
206            _ => Err("Expected string literal for JSON_PATH clause".to_string()),
207        }
208    }
209
210    fn parse_form_file(
211        parser: &mut crate::sql::recursive_parser::Parser,
212    ) -> Result<(String, String), String> {
213        // Parse field name
214        let field_name = match &parser.current_token {
215            Token::StringLiteral(name) => name.clone(),
216            _ => return Err("Expected field name string after FORM_FILE".to_string()),
217        };
218        parser.advance();
219
220        // Parse file path
221        let file_path = match &parser.current_token {
222            Token::StringLiteral(path) => path.clone(),
223            _ => return Err("Expected file path string after field name".to_string()),
224        };
225        parser.advance();
226
227        Ok((field_name, file_path))
228    }
229
230    fn parse_form_field(
231        parser: &mut crate::sql::recursive_parser::Parser,
232    ) -> Result<(String, String), String> {
233        // Parse field name
234        let field_name = match &parser.current_token {
235            Token::StringLiteral(name) | Token::JsonBlock(name) => name.clone(),
236            _ => return Err("Expected field name string after FORM_FIELD".to_string()),
237        };
238        parser.advance();
239
240        // Parse field value (can be regular string or JSON block)
241        let value = match &parser.current_token {
242            Token::StringLiteral(val) | Token::JsonBlock(val) => val.clone(),
243            _ => {
244                return Err(
245                    "Expected field value string or $JSON$ block after field name".to_string(),
246                )
247            }
248        };
249        parser.advance();
250
251        Ok((field_name, value))
252    }
253
254    fn parse_headers(
255        parser: &mut crate::sql::recursive_parser::Parser,
256    ) -> Result<Vec<(String, String)>, String> {
257        parser.consume(Token::LeftParen)?;
258        let mut headers = Vec::new();
259
260        loop {
261            // Parse header name
262            let key = match &parser.current_token {
263                Token::Identifier(id) => id.clone(),
264                Token::StringLiteral(s) => s.clone(),
265                _ => return Err("Expected header name".to_string()),
266            };
267            parser.advance();
268
269            // Expect : (colon) for header key-value separator
270            if !matches!(parser.current_token, Token::Colon) {
271                // For backwards compatibility, also accept =
272                if matches!(parser.current_token, Token::Equal) {
273                    parser.advance();
274                } else {
275                    return Err("Expected ':' or '=' after header name".to_string());
276                }
277            } else {
278                parser.advance(); // consume the colon
279            }
280
281            // Parse header value
282            let value = match &parser.current_token {
283                Token::StringLiteral(s) => s.clone(),
284                _ => return Err("Expected header value as string".to_string()),
285            };
286            parser.advance();
287
288            headers.push((key, value));
289
290            // Check for comma (more headers) or closing paren (end)
291            if matches!(parser.current_token, Token::Comma) {
292                parser.advance();
293            } else if matches!(parser.current_token, Token::RightParen) {
294                parser.advance();
295                break;
296            } else {
297                return Err("Expected ',' or ')' after header value".to_string());
298            }
299        }
300
301        Ok(headers)
302    }
303}