sql_cli/sql/parser/
web_cte_parser.rs

1/// Web CTE Parser Module
2/// Handles parsing of WEB CTEs for HTTP data fetching with custom selectors
3use super::ast::{DataFormat, HttpMethod, WebCTESpec};
4use super::lexer::Token;
5
6pub struct WebCteParser<'a> {
7    _tokens: &'a mut dyn Iterator<Item = Token>,
8    _current_token: Token,
9}
10
11impl<'a> WebCteParser<'a> {
12    pub fn new(tokens: &'a mut dyn Iterator<Item = Token>, current_token: Token) -> Self {
13        Self {
14            _tokens: tokens,
15            _current_token: current_token,
16        }
17    }
18
19    /// Main entry point to parse WEB CTE specification
20    /// Expects: URL 'url' [METHOD method] [FORMAT format] [HEADERS (...)] [CACHE n] [BODY 'body'] [FORM_FILE 'field' 'path'] [FORM_FIELD 'field' 'value']
21    pub fn parse(parser: &mut crate::sql::recursive_parser::Parser) -> Result<WebCTESpec, String> {
22        // Expect URL keyword
23        if let Token::Identifier(id) = &parser.current_token {
24            if id.to_uppercase() != "URL" {
25                return Err("Expected URL keyword in WEB CTE".to_string());
26            }
27        } else {
28            return Err("Expected URL keyword in WEB CTE".to_string());
29        }
30        parser.advance();
31
32        // Parse URL string
33        let url = match &parser.current_token {
34            Token::StringLiteral(url) => url.clone(),
35            _ => return Err("Expected URL string after URL keyword".to_string()),
36        };
37        parser.advance();
38
39        // Initialize optional fields
40        let mut format = None;
41        let mut headers = Vec::new();
42        let mut cache_seconds = None;
43        let mut method = None;
44        let mut body = None;
45        let mut json_path = None;
46        let mut form_files = Vec::new();
47        let mut form_fields = Vec::new();
48
49        // Parse optional clauses until we hit the closing parenthesis
50        while !matches!(parser.current_token, Token::RightParen)
51            && !matches!(parser.current_token, Token::Eof)
52        {
53            if let Token::Identifier(id) = &parser.current_token {
54                match id.to_uppercase().as_str() {
55                    "FORMAT" => {
56                        parser.advance();
57                        format = Some(Self::parse_data_format(parser)?);
58                    }
59                    "CACHE" => {
60                        parser.advance();
61                        cache_seconds = Some(Self::parse_cache_duration(parser)?);
62                    }
63                    "HEADERS" => {
64                        parser.advance();
65                        headers = Self::parse_headers(parser)?;
66                    }
67                    "METHOD" => {
68                        parser.advance();
69                        method = Some(Self::parse_http_method(parser)?);
70                    }
71                    "BODY" => {
72                        parser.advance();
73                        body = Some(Self::parse_body(parser)?);
74                    }
75                    "JSON_PATH" => {
76                        parser.advance();
77                        json_path = Some(Self::parse_json_path(parser)?);
78                    }
79                    "FORM_FILE" => {
80                        parser.advance();
81                        let (field_name, file_path) = Self::parse_form_file(parser)?;
82                        form_files.push((field_name, file_path));
83                    }
84                    "FORM_FIELD" => {
85                        parser.advance();
86                        let (field_name, value) = Self::parse_form_field(parser)?;
87                        form_fields.push((field_name, value));
88                    }
89                    _ => {
90                        return Err(format!(
91                            "Unexpected keyword '{}' in WEB CTE specification",
92                            id
93                        ));
94                    }
95                }
96            } else {
97                break;
98            }
99        }
100
101        Ok(WebCTESpec {
102            url,
103            format,
104            headers,
105            cache_seconds,
106            method,
107            body,
108            json_path,
109            form_files,
110            form_fields,
111        })
112    }
113
114    fn parse_data_format(
115        parser: &mut crate::sql::recursive_parser::Parser,
116    ) -> Result<DataFormat, String> {
117        if let Token::Identifier(id) = &parser.current_token {
118            let format = match id.to_uppercase().as_str() {
119                "CSV" => DataFormat::CSV,
120                "JSON" => DataFormat::JSON,
121                "AUTO" => DataFormat::Auto,
122                _ => return Err(format!("Unknown data format: {}", id)),
123            };
124            parser.advance();
125            Ok(format)
126        } else {
127            Err("Expected data format (CSV, JSON, or AUTO)".to_string())
128        }
129    }
130
131    fn parse_cache_duration(
132        parser: &mut crate::sql::recursive_parser::Parser,
133    ) -> Result<u64, String> {
134        match &parser.current_token {
135            Token::NumberLiteral(n) => {
136                let duration = n
137                    .parse::<u64>()
138                    .map_err(|_| format!("Invalid cache duration: {}", n))?;
139                parser.advance();
140                Ok(duration)
141            }
142            _ => Err("Expected number for cache duration".to_string()),
143        }
144    }
145
146    fn parse_http_method(
147        parser: &mut crate::sql::recursive_parser::Parser,
148    ) -> Result<HttpMethod, String> {
149        if let Token::Identifier(id) = &parser.current_token {
150            let method = match id.to_uppercase().as_str() {
151                "GET" => HttpMethod::GET,
152                "POST" => HttpMethod::POST,
153                "PUT" => HttpMethod::PUT,
154                "DELETE" => HttpMethod::DELETE,
155                "PATCH" => HttpMethod::PATCH,
156                _ => return Err(format!("Unknown HTTP method: {}", id)),
157            };
158            parser.advance();
159            Ok(method)
160        } else {
161            Err("Expected HTTP method (GET, POST, PUT, DELETE, PATCH)".to_string())
162        }
163    }
164
165    fn parse_body(parser: &mut crate::sql::recursive_parser::Parser) -> Result<String, String> {
166        match &parser.current_token {
167            Token::StringLiteral(body) | Token::JsonBlock(body) => {
168                let body = body.clone();
169                parser.advance();
170                Ok(body)
171            }
172            _ => Err("Expected string literal or $JSON$ block for BODY clause".to_string()),
173        }
174    }
175
176    fn parse_json_path(
177        parser: &mut crate::sql::recursive_parser::Parser,
178    ) -> Result<String, String> {
179        match &parser.current_token {
180            Token::StringLiteral(path) => {
181                let path = path.clone();
182                parser.advance();
183                Ok(path)
184            }
185            _ => Err("Expected string literal for JSON_PATH clause".to_string()),
186        }
187    }
188
189    fn parse_form_file(
190        parser: &mut crate::sql::recursive_parser::Parser,
191    ) -> Result<(String, String), String> {
192        // Parse field name
193        let field_name = match &parser.current_token {
194            Token::StringLiteral(name) => name.clone(),
195            _ => return Err("Expected field name string after FORM_FILE".to_string()),
196        };
197        parser.advance();
198
199        // Parse file path
200        let file_path = match &parser.current_token {
201            Token::StringLiteral(path) => path.clone(),
202            _ => return Err("Expected file path string after field name".to_string()),
203        };
204        parser.advance();
205
206        Ok((field_name, file_path))
207    }
208
209    fn parse_form_field(
210        parser: &mut crate::sql::recursive_parser::Parser,
211    ) -> Result<(String, String), String> {
212        // Parse field name
213        let field_name = match &parser.current_token {
214            Token::StringLiteral(name) | Token::JsonBlock(name) => name.clone(),
215            _ => return Err("Expected field name string after FORM_FIELD".to_string()),
216        };
217        parser.advance();
218
219        // Parse field value (can be regular string or JSON block)
220        let value = match &parser.current_token {
221            Token::StringLiteral(val) | Token::JsonBlock(val) => val.clone(),
222            _ => {
223                return Err(
224                    "Expected field value string or $JSON$ block after field name".to_string(),
225                )
226            }
227        };
228        parser.advance();
229
230        Ok((field_name, value))
231    }
232
233    fn parse_headers(
234        parser: &mut crate::sql::recursive_parser::Parser,
235    ) -> Result<Vec<(String, String)>, String> {
236        parser.consume(Token::LeftParen)?;
237        let mut headers = Vec::new();
238
239        loop {
240            // Parse header name
241            let key = match &parser.current_token {
242                Token::Identifier(id) => id.clone(),
243                Token::StringLiteral(s) => s.clone(),
244                _ => return Err("Expected header name".to_string()),
245            };
246            parser.advance();
247
248            // Expect : (colon) for header key-value separator
249            if !matches!(parser.current_token, Token::Colon) {
250                // For backwards compatibility, also accept =
251                if matches!(parser.current_token, Token::Equal) {
252                    parser.advance();
253                } else {
254                    return Err("Expected ':' or '=' after header name".to_string());
255                }
256            } else {
257                parser.advance(); // consume the colon
258            }
259
260            // Parse header value
261            let value = match &parser.current_token {
262                Token::StringLiteral(s) => s.clone(),
263                _ => return Err("Expected header value as string".to_string()),
264            };
265            parser.advance();
266
267            headers.push((key, value));
268
269            // Check for comma (more headers) or closing paren (end)
270            if matches!(parser.current_token, Token::Comma) {
271                parser.advance();
272            } else if matches!(parser.current_token, Token::RightParen) {
273                parser.advance();
274                break;
275            } else {
276                return Err("Expected ',' or ')' after header value".to_string());
277            }
278        }
279
280        Ok(headers)
281    }
282}