Skip to main content

fraiseql_cli/schema/
sql_templates.rs

1//! SQL template extraction from database handlers.
2//!
3//! This module extracts SQL generation patterns from the database handlers
4//! and stores them as metadata in the compiled schema.
5//!
6//! For each rich scalar type and its operators, we:
7//! 1. Call the database handler's generate_extended_sql() with mock data
8//! 2. Normalize the generated SQL to a template format
9//! 3. Store the templates for runtime SQL generation
10//!
11//! # Template Format
12//!
13//! Templates use placeholders:
14//! - `$field` - The JSONB field reference (e.g., `data->>'email'`)
15//! - `$1`, `$2`, etc. - Parameter placeholders (database-specific)
16//!
17//! # Example
18//!
19//! For EmailDomainEq on PostgreSQL:
20//! - Input: field_sql = "data->>'email'", domain = "example.com"
21//! - Handler output: "SPLIT_PART(data->>'email', '@', 2) = $1"
22//! - Template: "SPLIT_PART($field, '@', 2) = $1"
23
24use std::collections::HashMap;
25
26use serde_json::json;
27
28/// Extract SQL template for an operator from a specific database handler.
29///
30/// Maps operator names to SQL templates for all 4 databases.
31/// Organizes templates by operation type for maintainability.
32///
33/// # Lookup-Based Operators
34///
35/// Some operators use lookup data stored in the compiled schema:
36/// - Country operators: continent, region, EU/Schengen membership
37/// - Currency operators: currency code, symbol, decimal places
38/// - Timezone operators: UTC offset, daylight saving time
39/// - Language operators: language family, script
40///
41/// These templates use a special `$lookup` placeholder that's replaced
42/// at runtime with actual lookup value parameters.
43fn extract_template_for_operator(db_name: &str, operator_name: &str) -> Option<String> {
44    match (db_name, operator_name) {
45        // ========================================================================
46        // EMAIL OPERATORS
47        // ========================================================================
48        ("postgres", "domainEq") => Some("SPLIT_PART($field, '@', 2) = $1".to_string()),
49        ("mysql", "domainEq") => Some("SUBSTRING_INDEX($field, '@', -1) = ?".to_string()),
50        ("sqlite", "domainEq") => Some("SUBSTR($field, INSTR($field, '@') + 1) = ?".to_string()),
51        ("sqlserver", "domainEq") => Some("SUBSTRING($field, CHARINDEX('@', $field) + 1, LEN($field)) = ?".to_string()),
52
53        ("postgres", "domainIn") => Some("SPLIT_PART($field, '@', 2) IN ($params)".to_string()),
54        ("mysql", "domainIn") => Some("SUBSTRING_INDEX($field, '@', -1) IN ($params)".to_string()),
55        ("sqlite", "domainIn") => Some("SUBSTR($field, INSTR($field, '@') + 1) IN ($params)".to_string()),
56        ("sqlserver", "domainIn") => Some("SUBSTRING($field, CHARINDEX('@', $field) + 1, LEN($field)) IN ($params)".to_string()),
57
58        ("postgres", "domainEndswith") => Some("SPLIT_PART($field, '@', 2) LIKE '%' || $1".to_string()),
59        ("mysql", "domainEndswith") => Some("SUBSTRING_INDEX($field, '@', -1) LIKE CONCAT('%', ?)".to_string()),
60        ("sqlite", "domainEndswith") => Some("SUBSTR($field, INSTR($field, '@') + 1) LIKE '%' || ?".to_string()),
61        ("sqlserver", "domainEndswith") => Some("SUBSTRING($field, CHARINDEX('@', $field) + 1, LEN($field)) LIKE '%' + ?".to_string()),
62
63        ("postgres", "localPartStartswith") => Some("SPLIT_PART($field, '@', 1) LIKE $1 || '%'".to_string()),
64        ("mysql", "localPartStartswith") => Some("SUBSTRING_INDEX($field, '@', 1) LIKE CONCAT(?, '%')".to_string()),
65        ("sqlite", "localPartStartswith") => Some("SUBSTR($field, 1, INSTR($field, '@') - 1) LIKE ? || '%'".to_string()),
66        ("sqlserver", "localPartStartswith") => Some("SUBSTRING($field, 1, CHARINDEX('@', $field) - 1) LIKE ? + '%'".to_string()),
67
68        // ========================================================================
69        // VIN OPERATORS
70        // ========================================================================
71        ("postgres", "wmiEq") => Some("SUBSTRING($field FROM 1 FOR 3) = $1".to_string()),
72        ("mysql", "wmiEq") => Some("SUBSTRING($field, 1, 3) = ?".to_string()),
73        ("sqlite", "wmiEq") => Some("SUBSTR($field, 1, 3) = ?".to_string()),
74        ("sqlserver", "wmiEq") => Some("SUBSTRING($field, 1, 3) = ?".to_string()),
75
76        // ========================================================================
77        // IBAN OPERATORS
78        // ========================================================================
79        ("postgres", "countryEq") => Some("SUBSTRING($field FROM 1 FOR 2) = $1".to_string()),
80        ("mysql", "countryEq") => Some("SUBSTRING($field, 1, 2) = ?".to_string()),
81        ("sqlite", "countryEq") => Some("SUBSTR($field, 1, 2) = ?".to_string()),
82        ("sqlserver", "countryEq") => Some("SUBSTRING($field, 1, 2) = ?".to_string()),
83
84        // ========================================================================
85        // URL OPERATORS
86        // ========================================================================
87        // protocolEq: extract protocol before ://
88        ("postgres", "protocolEq") => Some("SPLIT_PART($field, '://', 1) = $1".to_string()),
89        ("mysql", "protocolEq") => Some("SUBSTRING($field, 1, LOCATE('://', $field) - 1) = ?".to_string()),
90        ("sqlite", "protocolEq") => Some("SUBSTR($field, 1, INSTR($field, '://') - 1) = ?".to_string()),
91        ("sqlserver", "protocolEq") => Some("SUBSTRING($field, 1, CHARINDEX('://', $field) - 1) = ?".to_string()),
92
93        // hostEq: extract host part
94        ("postgres", "hostEq") => Some("SPLIT_PART(SPLIT_PART($field, '://', 2), '/', 1) = $1".to_string()),
95        ("mysql", "hostEq") => Some("SUBSTRING(SUBSTRING($field, LOCATE('://', $field) + 3), 1, LOCATE('/', SUBSTRING($field, LOCATE('://', $field) + 3)) - 1) = ?".to_string()),
96        ("sqlite", "hostEq") => Some("SUBSTR($field, INSTR($field, '://') + 3, INSTR(SUBSTR($field, INSTR($field, '://') + 3), '/') - 1) = ?".to_string()),
97        ("sqlserver", "hostEq") => Some("SUBSTRING(SUBSTRING($field, CHARINDEX('://', $field) + 3), 1, CHARINDEX('/', SUBSTRING($field, CHARINDEX('://', $field) + 3)) - 1) = ?".to_string()),
98
99        // pathStartswith: extract path part
100        ("postgres", "pathStartswith") => Some("SPLIT_PART(SPLIT_PART($field, '://', 2), '?', 1) LIKE $1 || '%'".to_string()),
101        ("mysql", "pathStartswith") => Some("SUBSTRING(SUBSTRING($field, LOCATE('://', $field) + 3), LOCATE('/', SUBSTRING($field, LOCATE('://', $field) + 3)), LOCATE('?', SUBSTRING($field, LOCATE('://', $field) + 3)) - LOCATE('/', SUBSTRING($field, LOCATE('://', $field) + 3))) LIKE CONCAT(?, '%')".to_string()),
102        ("sqlite", "pathStartswith") => Some("SUBSTR(SUBSTR($field, INSTR($field, '://') + 3), INSTR(SUBSTR($field, INSTR($field, '://') + 3), '/')) LIKE ? || '%'".to_string()),
103        ("sqlserver", "pathStartswith") => Some("SUBSTRING($field, CHARINDEX('/', $field, CHARINDEX('://', $field) + 3), CHARINDEX('?', $field) - CHARINDEX('/', $field, CHARINDEX('://', $field) + 3)) LIKE ? + '%'".to_string()),
104
105        // ========================================================================
106        // DOMAIN NAME OPERATORS
107        // ========================================================================
108        // tldEq: extract TLD (rightmost part after last dot)
109        ("postgres", "tldEq") => Some("RIGHT($field, LENGTH($field) - STRPOS($field, '.') + 1) = $1".to_string()),
110        ("mysql", "tldEq") => Some("SUBSTRING($field, LOCATE('.', REVERSE($field)) + 1) = ?".to_string()),
111        ("sqlite", "tldEq") => Some("SUBSTR($field, INSTR($field, '.') + 1) = ?".to_string()),
112        ("sqlserver", "tldEq") => Some("SUBSTRING($field, CHARINDEX('.', REVERSE($field)) + 1, LEN($field)) = ?".to_string()),
113
114        // tldIn: extract TLD and check in list
115        ("postgres", "tldIn") => Some("RIGHT($field, LENGTH($field) - STRPOS($field, '.') + 1) IN ($params)".to_string()),
116        ("mysql", "tldIn") => Some("SUBSTRING($field, LOCATE('.', REVERSE($field)) + 1) IN ($params)".to_string()),
117        ("sqlite", "tldIn") => Some("SUBSTR($field, INSTR($field, '.') + 1) IN ($params)".to_string()),
118        ("sqlserver", "tldIn") => Some("SUBSTRING($field, CHARINDEX('.', REVERSE($field)) + 1, LEN($field)) IN ($params)".to_string()),
119
120        // ========================================================================
121        // HOSTNAME OPERATORS
122        // ========================================================================
123        // isFqdn: check if contains at least one dot
124        ("postgres", "isFqdn") => Some("CASE WHEN POSITION('.' IN $field) > 0 THEN true ELSE false END = $1".to_string()),
125        ("mysql", "isFqdn") => Some("CASE WHEN LOCATE('.', $field) > 0 THEN 1 ELSE 0 END = ?".to_string()),
126        ("sqlite", "isFqdn") => Some("CASE WHEN INSTR($field, '.') > 0 THEN 1 ELSE 0 END = ?".to_string()),
127        ("sqlserver", "isFqdn") => Some("CASE WHEN CHARINDEX('.', $field) > 0 THEN 1 ELSE 0 END = ?".to_string()),
128
129        // depthEq: count labels (dots + 1)
130        ("postgres", "depthEq") => Some("(LENGTH($field) - LENGTH(REPLACE($field, '.', '')) + 1) = $1".to_string()),
131        ("mysql", "depthEq") => Some("(LENGTH($field) - LENGTH(REPLACE($field, '.', '')) + 1) = ?".to_string()),
132        ("sqlite", "depthEq") => Some("(LENGTH($field) - LENGTH(REPLACE($field, '.', '')) + 1) = ?".to_string()),
133        ("sqlserver", "depthEq") => Some("(LEN($field) - LEN(REPLACE($field, '.', '')) + 1) = ?".to_string()),
134
135        // ========================================================================
136        // STANDARD STRING OPERATORS (apply to multiple types)
137        // ========================================================================
138        // Generic equals (when no extraction needed)
139        ("postgres", "eq") => Some("$field = $1".to_string()),
140        ("mysql", "eq") => Some("$field = ?".to_string()),
141        ("sqlite", "eq") => Some("$field = ?".to_string()),
142        ("sqlserver", "eq") => Some("$field = ?".to_string()),
143
144        // Generic contains
145        ("postgres", "contains") => Some("$field LIKE '%' || $1 || '%'".to_string()),
146        ("mysql", "contains") => Some("$field LIKE CONCAT('%', ?, '%')".to_string()),
147        ("sqlite", "contains") => Some("$field LIKE '%' || ? || '%'".to_string()),
148        ("sqlserver", "contains") => Some("$field LIKE '%' + ? + '%'".to_string()),
149
150        // Generic startswith
151        ("postgres", "startswith") => Some("$field LIKE $1 || '%'".to_string()),
152        ("mysql", "startswith") => Some("$field LIKE CONCAT(?, '%')".to_string()),
153        ("sqlite", "startswith") => Some("$field LIKE ? || '%'".to_string()),
154        ("sqlserver", "startswith") => Some("$field LIKE ? + '%'".to_string()),
155
156        // Generic endswith
157        ("postgres", "endswith") => Some("$field LIKE '%' || $1".to_string()),
158        ("mysql", "endswith") => Some("$field LIKE CONCAT('%', ?)".to_string()),
159        ("sqlite", "endswith") => Some("$field LIKE '%' || ?".to_string()),
160        ("sqlserver", "endswith") => Some("$field LIKE '%' + ?".to_string()),
161
162        // ========================================================================
163        // NUMERIC RANGE OPERATORS
164        // ========================================================================
165        // withinRange: numeric comparison between two values
166        ("postgres", "withinRange") => Some("$field BETWEEN $1 AND $2".to_string()),
167        ("mysql", "withinRange") => Some("$field BETWEEN ? AND ?".to_string()),
168        ("sqlite", "withinRange") => Some("$field BETWEEN ? AND ?".to_string()),
169        ("sqlserver", "withinRange") => Some("$field BETWEEN ? AND ?".to_string()),
170
171        // hemisphereEq: simple string match for hemisphere
172        ("postgres", "hemisphereEq") => Some("$field LIKE $1 || '%'".to_string()),
173        ("mysql", "hemisphereEq") => Some("$field LIKE CONCAT(?, '%')".to_string()),
174        ("sqlite", "hemisphereEq") => Some("$field LIKE ? || '%'".to_string()),
175        ("sqlserver", "hemisphereEq") => Some("$field LIKE ? + '%'".to_string()),
176
177        // ========================================================================
178        // POSTAL CODE OPERATORS
179        // ========================================================================
180        // Uses countryEq but needs to extract country code from postal code
181        // This is type-specific and handled in handlers
182        ("postgres", "postalCodeCountryEq") => Some("LEFT($field, 2) = $1".to_string()),
183        ("mysql", "postalCodeCountryEq") => Some("LEFT($field, 2) = ?".to_string()),
184        ("sqlite", "postalCodeCountryEq") => Some("SUBSTR($field, 1, 2) = ?".to_string()),
185        ("sqlserver", "postalCodeCountryEq") => Some("LEFT($field, 2) = ?".to_string()),
186
187        // ========================================================================
188        // SIMPLE TYPES (STRING EQUALITY)
189        // ========================================================================
190        // These types just use simple string comparison
191        ("postgres", "timeZoneEq") => Some("$field = $1".to_string()),
192        ("mysql", "timeZoneEq") => Some("$field = ?".to_string()),
193        ("sqlite", "timeZoneEq") => Some("$field = ?".to_string()),
194        ("sqlserver", "timeZoneEq") => Some("$field = ?".to_string()),
195
196        // Phone country code
197        ("postgres", "countryCodeEq") => Some("SPLIT_PART($field, '-', 1) = $1".to_string()),
198        ("mysql", "countryCodeEq") => Some("SUBSTRING_INDEX($field, '-', 1) = ?".to_string()),
199        ("sqlite", "countryCodeEq") => Some("SUBSTR($field, 1, INSTR($field, '-') - 1) = ?".to_string()),
200        ("sqlserver", "countryCodeEq") => Some("SUBSTRING($field, 1, CHARINDEX('-', $field) - 1) = ?".to_string()),
201
202        ("postgres", "countryCodeIn") => Some("SPLIT_PART($field, '-', 1) IN ($params)".to_string()),
203        ("mysql", "countryCodeIn") => Some("SUBSTRING_INDEX($field, '-', 1) IN ($params)".to_string()),
204        ("sqlite", "countryCodeIn") => Some("SUBSTR($field, 1, INSTR($field, '-') - 1) IN ($params)".to_string()),
205        ("sqlserver", "countryCodeIn") => Some("SUBSTRING($field, 1, CHARINDEX('-', $field) - 1) IN ($params)".to_string()),
206
207        // ========================================================================
208        // FINANCIAL IDENTIFIERS (CUSIP, ISIN, SEDOL, etc.)
209        // ========================================================================
210        // For most financial identifiers, use simple string operations
211        ("postgres", "cusipFormatValid") => Some("LENGTH($field) = 9".to_string()),
212        ("mysql", "cusipFormatValid") => Some("LENGTH($field) = 9".to_string()),
213        ("sqlite", "cusipFormatValid") => Some("LENGTH($field) = 9".to_string()),
214        ("sqlserver", "cusipFormatValid") => Some("LEN($field) = 9".to_string()),
215
216        ("postgres", "isinFormatValid") => Some("LENGTH($field) = 12".to_string()),
217        ("mysql", "isinFormatValid") => Some("LENGTH($field) = 12".to_string()),
218        ("sqlite", "isinFormatValid") => Some("LENGTH($field) = 12".to_string()),
219        ("sqlserver", "isinFormatValid") => Some("LEN($field) = 12".to_string()),
220
221        ("postgres", "sedolFormatValid") => Some("LENGTH($field) = 7".to_string()),
222        ("mysql", "sedolFormatValid") => Some("LENGTH($field) = 7".to_string()),
223        ("sqlite", "sedolFormatValid") => Some("LENGTH($field) = 7".to_string()),
224        ("sqlserver", "sedolFormatValid") => Some("LEN($field) = 7".to_string()),
225
226        // Stock symbol equals
227        ("postgres", "symbolEq") => Some("$field = $1".to_string()),
228        ("mysql", "symbolEq") => Some("$field = ?".to_string()),
229        ("sqlite", "symbolEq") => Some("$field = ?".to_string()),
230        ("sqlserver", "symbolEq") => Some("$field = ?".to_string()),
231
232        // Exchange code equals
233        ("postgres", "exchangeCodeEq") => Some("$field = $1".to_string()),
234        ("mysql", "exchangeCodeEq") => Some("$field = ?".to_string()),
235        ("sqlite", "exchangeCodeEq") => Some("$field = ?".to_string()),
236        ("sqlserver", "exchangeCodeEq") => Some("$field = ?".to_string()),
237
238        // Currency code equals
239        ("postgres", "currencyCodeEq") => Some("$field = $1".to_string()),
240        ("mysql", "currencyCodeEq") => Some("$field = ?".to_string()),
241        ("sqlite", "currencyCodeEq") => Some("$field = ?".to_string()),
242        ("sqlserver", "currencyCodeEq") => Some("$field = ?".to_string()),
243
244        // ========================================================================
245        // IDENTIFIER TYPES (Slug, SemanticVersion, HashSHA256, APIKey)
246        // ========================================================================
247        // Slug: alphanumeric + hyphens
248        ("postgres", "slugFormatValid") => Some("$field ~ '^[a-z0-9-]+$'".to_string()),
249        ("mysql", "slugFormatValid") => Some("$field REGEXP '^[a-z0-9-]+$'".to_string()),
250        ("sqlite", "slugFormatValid") => Some("$field GLOB '[a-z0-9-]*'".to_string()),
251        ("sqlserver", "slugFormatValid") => Some("$field LIKE '[a-z0-9-]*'".to_string()),
252
253        // Semantic version: matches X.Y.Z pattern
254        ("postgres", "semverFormatValid") => Some("$field ~ '^[0-9]+\\.[0-9]+\\.[0-9]+.*$'".to_string()),
255        ("mysql", "semverFormatValid") => Some("$field REGEXP '^[0-9]+\\.[0-9]+\\.[0-9]+.*$'".to_string()),
256        ("sqlite", "semverFormatValid") => Some("$field GLOB '[0-9]*.[0-9]*.[0-9]*'".to_string()),
257        ("sqlserver", "semverFormatValid") => Some("$field LIKE '[0-9]%.[0-9]%.[0-9]%'".to_string()),
258
259        // SHA256 hash: 64 hex characters
260        ("postgres", "hashFormatValid") => Some("LENGTH($field) = 64 AND $field ~ '^[a-f0-9]+$'".to_string()),
261        ("mysql", "hashFormatValid") => Some("LENGTH($field) = 64 AND $field REGEXP '^[a-f0-9]+$'".to_string()),
262        ("sqlite", "hashFormatValid") => Some("LENGTH($field) = 64 AND $field GLOB '[a-f0-9]*'".to_string()),
263        ("sqlserver", "hashFormatValid") => Some("LEN($field) = 64 AND $field LIKE '[a-f0-9]%'".to_string()),
264
265        // API Key: usually alphanumeric with underscores
266        ("postgres", "apikeyFormatValid") => Some("$field ~ '^[a-zA-Z0-9_-]+$'".to_string()),
267        ("mysql", "apikeyFormatValid") => Some("$field REGEXP '^[a-zA-Z0-9_-]+$'".to_string()),
268        ("sqlite", "apikeyFormatValid") => Some("$field GLOB '[a-zA-Z0-9_-]*'".to_string()),
269        ("sqlserver", "apikeyFormatValid") => Some("$field LIKE '[a-zA-Z0-9_-]*'".to_string()),
270
271        // ========================================================================
272        // CONTENT TYPES (Markdown, HTML, MimeType, Color)
273        // ========================================================================
274        // MIME type equals
275        ("postgres", "mimetypeEq") => Some("$field = $1".to_string()),
276        ("mysql", "mimetypeEq") => Some("$field = ?".to_string()),
277        ("sqlite", "mimetypeEq") => Some("$field = ?".to_string()),
278        ("sqlserver", "mimetypeEq") => Some("$field = ?".to_string()),
279
280        // MIME type starts with (e.g., "image/")
281        ("postgres", "mimetypeStartswith") => Some("$field LIKE $1 || '%'".to_string()),
282        ("mysql", "mimetypeStartswith") => Some("$field LIKE CONCAT(?, '%')".to_string()),
283        ("sqlite", "mimetypeStartswith") => Some("$field LIKE ? || '%'".to_string()),
284        ("sqlserver", "mimetypeStartswith") => Some("$field LIKE ? + '%'".to_string()),
285
286        // Color format validation (hex color)
287        ("postgres", "colorHexFormatValid") => Some("$field ~ '^#[a-f0-9]{6}$'".to_string()),
288        ("mysql", "colorHexFormatValid") => Some("$field REGEXP '^#[a-f0-9]{6}$'".to_string()),
289        ("sqlite", "colorHexFormatValid") => Some("$field GLOB '#[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]'".to_string()),
290        ("sqlserver", "colorHexFormatValid") => Some("$field LIKE '#[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]'".to_string()),
291
292        // ========================================================================
293        // NETWORK TYPES (IPAddress, IPv4, IPv6, CIDR, Port)
294        // ========================================================================
295        // IPv4 format: 4 octets separated by dots
296        ("postgres", "ipv4FormatValid") => Some("$field ~ '^(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$'".to_string()),
297        ("mysql", "ipv4FormatValid") => Some("$field REGEXP '^(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$'".to_string()),
298        ("sqlite", "ipv4FormatValid") => Some("CAST(CAST(CAST(CAST(SUBSTR($field,1,INSTR($field,'.')-1) AS INT) AS INT) > 0 AS INT) || CAST(CAST(SUBSTR($field,1,INSTR($field,'.')-1) AS INT) <= 255 AS INT) AS INT) = 1".to_string()),
299        ("sqlserver", "ipv4FormatValid") => Some("CONVERT(BIT, CASE WHEN $field LIKE '[0-9].*.[0-9].*.[0-9].*.[0-9]' THEN 1 ELSE 0 END) = 1".to_string()),
300
301        // Port number: between 0 and 65535
302        ("postgres", "portValid") => Some("CAST($field AS INTEGER) BETWEEN 0 AND 65535".to_string()),
303        ("mysql", "portValid") => Some("CAST($field AS UNSIGNED) BETWEEN 0 AND 65535".to_string()),
304        ("sqlite", "portValid") => Some("CAST($field AS INTEGER) BETWEEN 0 AND 65535".to_string()),
305        ("sqlserver", "portValid") => Some("CAST($field AS INT) BETWEEN 0 AND 65535".to_string()),
306
307        // ========================================================================
308        // MEASUREMENT/RANGE TYPES
309        // ========================================================================
310        // Percentage: 0-100
311        ("postgres", "percentageValid") => Some("CAST($field AS DECIMAL) BETWEEN 0 AND 100".to_string()),
312        ("mysql", "percentageValid") => Some("CAST($field AS DECIMAL) BETWEEN 0 AND 100".to_string()),
313        ("sqlite", "percentageValid") => Some("CAST($field AS REAL) BETWEEN 0 AND 100".to_string()),
314        ("sqlserver", "percentageValid") => Some("CAST($field AS DECIMAL) BETWEEN 0 AND 100".to_string()),
315
316        // ========================================================================
317        // LOOKUP-BASED OPERATORS
318        // ========================================================================
319        // These operators use external lookup data embedded in the schema.
320        // Templates use $lookup placeholder for the lookup field name.
321
322        // Country: continent membership
323        ("postgres", "continentEq") => Some("$lookup ->> 'continent' = $1".to_string()),
324        ("mysql", "continentEq") => Some("JSON_EXTRACT($lookup, '$.continent') = ?".to_string()),
325        ("sqlite", "continentEq") => Some("json_extract($lookup, '$.continent') = ?".to_string()),
326        ("sqlserver", "continentEq") => Some("JSON_VALUE($lookup, '$.continent') = ?".to_string()),
327
328        // Country: region membership
329        ("postgres", "regionEq") => Some("$lookup ->> 'region' = $1".to_string()),
330        ("mysql", "regionEq") => Some("JSON_EXTRACT($lookup, '$.region') = ?".to_string()),
331        ("sqlite", "regionEq") => Some("json_extract($lookup, '$.region') = ?".to_string()),
332        ("sqlserver", "regionEq") => Some("JSON_VALUE($lookup, '$.region') = ?".to_string()),
333
334        // Country: EU membership
335        ("postgres", "inEu") => Some("($lookup ->> 'in_eu')::boolean = $1".to_string()),
336        ("mysql", "inEu") => Some("JSON_EXTRACT($lookup, '$.in_eu') = ?".to_string()),
337        ("sqlite", "inEu") => Some("json_extract($lookup, '$.in_eu') = ?".to_string()),
338        ("sqlserver", "inEu") => Some("JSON_VALUE($lookup, '$.in_eu') = ?".to_string()),
339
340        // Country: Schengen membership
341        ("postgres", "inSchengen") => Some("($lookup ->> 'in_schengen')::boolean = $1".to_string()),
342        ("mysql", "inSchengen") => Some("JSON_EXTRACT($lookup, '$.in_schengen') = ?".to_string()),
343        ("sqlite", "inSchengen") => Some("json_extract($lookup, '$.in_schengen') = ?".to_string()),
344        ("sqlserver", "inSchengen") => Some("JSON_VALUE($lookup, '$.in_schengen') = ?".to_string()),
345
346        // Currency: decimal places (for Money type)
347        ("postgres", "currencyDecimalEq") => Some("($lookup ->> 'decimal_places')::integer = $1".to_string()),
348        ("mysql", "currencyDecimalEq") => Some("JSON_EXTRACT($lookup, '$.decimal_places') = ?".to_string()),
349        ("sqlite", "currencyDecimalEq") => Some("json_extract($lookup, '$.decimal_places') = ?".to_string()),
350        ("sqlserver", "currencyDecimalEq") => Some("JSON_VALUE($lookup, '$.decimal_places') = ?".to_string()),
351
352        // Timezone: offset in minutes from UTC
353        ("postgres", "timezoneOffsetEq") => Some("($lookup ->> 'offset_minutes')::integer = $1".to_string()),
354        ("mysql", "timezoneOffsetEq") => Some("JSON_EXTRACT($lookup, '$.offset_minutes') = ?".to_string()),
355        ("sqlite", "timezoneOffsetEq") => Some("json_extract($lookup, '$.offset_minutes') = ?".to_string()),
356        ("sqlserver", "timezoneOffsetEq") => Some("JSON_VALUE($lookup, '$.offset_minutes') = ?".to_string()),
357
358        // Timezone: daylight saving time support
359        ("postgres", "timezoneDst") => Some("($lookup ->> 'has_dst')::boolean = $1".to_string()),
360        ("mysql", "timezoneDst") => Some("JSON_EXTRACT($lookup, '$.has_dst') = ?".to_string()),
361        ("sqlite", "timezoneDst") => Some("json_extract($lookup, '$.has_dst') = ?".to_string()),
362        ("sqlserver", "timezoneDst") => Some("JSON_VALUE($lookup, '$.has_dst') = ?".to_string()),
363
364        // Timezone: region (Americas, Europe, Asia, Oceania)
365        ("postgres", "timezoneRegionEq") => Some("$lookup ->> 'region' = $1".to_string()),
366        ("mysql", "timezoneRegionEq") => Some("JSON_EXTRACT($lookup, '$.region') = ?".to_string()),
367        ("sqlite", "timezoneRegionEq") => Some("json_extract($lookup, '$.region') = ?".to_string()),
368        ("sqlserver", "timezoneRegionEq") => Some("JSON_VALUE($lookup, '$.region') = ?".to_string()),
369
370        // Language: family (Indo-European, Sino-Tibetan, Japonic, etc.)
371        ("postgres", "languageFamilyEq") => Some("$lookup ->> 'family' = $1".to_string()),
372        ("mysql", "languageFamilyEq") => Some("JSON_EXTRACT($lookup, '$.family') = ?".to_string()),
373        ("sqlite", "languageFamilyEq") => Some("json_extract($lookup, '$.family') = ?".to_string()),
374        ("sqlserver", "languageFamilyEq") => Some("JSON_VALUE($lookup, '$.family') = ?".to_string()),
375
376        // Language: writing script (Latin, Cyrillic, Han, etc.)
377        ("postgres", "languageScriptEq") => Some("$lookup ->> 'script' = $1".to_string()),
378        ("mysql", "languageScriptEq") => Some("JSON_EXTRACT($lookup, '$.script') = ?".to_string()),
379        ("sqlite", "languageScriptEq") => Some("json_extract($lookup, '$.script') = ?".to_string()),
380        ("sqlserver", "languageScriptEq") => Some("JSON_VALUE($lookup, '$.script') = ?".to_string()),
381
382        // Locale: language part of locale code
383        ("postgres", "localeLanguageEq") => Some("SPLIT_PART($field, '-', 1) = $1".to_string()),
384        ("mysql", "localeLanguageEq") => Some("SUBSTRING_INDEX($field, '-', 1) = ?".to_string()),
385        ("sqlite", "localeLanguageEq") => Some("SUBSTR($field, 1, INSTR($field, '-') - 1) = ?".to_string()),
386        ("sqlserver", "localeLanguageEq") => Some("SUBSTRING($field, 1, CHARINDEX('-', $field) - 1) = ?".to_string()),
387
388        // Locale: country part of locale code
389        ("postgres", "localeCountryEq") => Some("SPLIT_PART($field, '-', 2) = $1".to_string()),
390        ("mysql", "localeCountryEq") => Some("SUBSTRING_INDEX(SUBSTRING_INDEX($field, '-', 2), '-', -1) = ?".to_string()),
391        ("sqlite", "localeCountryEq") => Some("SUBSTR($field, INSTR($field, '-') + 1) = ?".to_string()),
392        ("sqlserver", "localeCountryEq") => Some("SUBSTRING($field, CHARINDEX('-', $field) + 1, LEN($field)) = ?".to_string()),
393
394        // ========================================================================
395        // GEOSPATIAL OPERATORS (PostGIS - PostgreSQL only, with fallbacks)
396        // ========================================================================
397        // Coordinates: Distance within radius
398        // Format: JSONB with {lat: f64, lng: f64}
399        ("postgres", "distanceWithin") => Some(
400            "ST_DWithin(
401                ST_GeomFromText('POINT(' || ($field->>'lng') || ' ' || ($field->>'lat') || ')'),
402                ST_GeomFromText('POINT($1 $2)'),
403                $3 * 1000
404            )"
405            .to_string()
406        ),
407        // MySQL: Uses ST_Distance_Sphere for great-circle distance
408        ("mysql", "distanceWithin") => Some(
409            "ST_Distance_Sphere(
410                ST_GeomFromText(CONCAT('POINT(', JSON_EXTRACT($field, '$.lng'), ' ', JSON_EXTRACT($field, '$.lat'), ')')),
411                ST_GeomFromText(CONCAT('POINT(', ?, ' ', ?, ')'))
412            ) <= ? * 1000"
413            .to_string()
414        ),
415        // SQLite: Haversine formula approximation
416        ("sqlite", "distanceWithin") => Some(
417            "111.111 * DEGREES(ACOS(LEAST(1, GREATEST(-1,
418                COS(RADIANS(90 - json_extract($field, '$.lat'))) *
419                COS(RADIANS(90 - ?)) *
420                COS(RADIANS(json_extract($field, '$.lng') - ?)) +
421                SIN(RADIANS(90 - json_extract($field, '$.lat'))) *
422                SIN(RADIANS(90 - ?))
423            )))) <= ?"
424            .to_string()
425        ),
426        // SQL Server: Uses geography type
427        ("sqlserver", "distanceWithin") => Some(
428            "geography::Point(JSON_VALUE($field, '$.lat'), JSON_VALUE($field, '$.lng'), 4326)
429                .STDistance(geography::Point(?, ?, 4326)) <= ? * 1000"
430            .to_string()
431        ),
432
433        // Coordinates: Within bounding box
434        ("postgres", "withinBoundingBox") => Some(
435            "($field->>'lat')::float8 BETWEEN $1 AND $2 AND ($field->>'lng')::float8 BETWEEN $3 AND $4"
436                .to_string()
437        ),
438        ("mysql", "withinBoundingBox") => Some(
439            "JSON_EXTRACT($field, '$.lat') BETWEEN ? AND ? AND JSON_EXTRACT($field, '$.lng') BETWEEN ? AND ?"
440                .to_string()
441        ),
442        ("sqlite", "withinBoundingBox") => Some(
443            "json_extract($field, '$.lat') BETWEEN ? AND ? AND json_extract($field, '$.lng') BETWEEN ? AND ?"
444                .to_string()
445        ),
446        ("sqlserver", "withinBoundingBox") => Some(
447            "JSON_VALUE($field, '$.lat') BETWEEN ? AND ? AND JSON_VALUE($field, '$.lng') BETWEEN ? AND ?"
448                .to_string()
449        ),
450
451        // ========================================================================
452        // PHONE NUMBER OPERATORS
453        // ========================================================================
454        // Phone: Country code from E.164 format
455        ("postgres", "phoneCountryCodeEq") => Some("SUBSTRING($field FROM 1 FOR LENGTH($1)) = $1".to_string()),
456        ("mysql", "phoneCountryCodeEq") => Some("SUBSTRING($field, 1, LENGTH(?)) = ?".to_string()),
457        ("sqlite", "phoneCountryCodeEq") => Some("SUBSTR($field, 1, LENGTH(?)) = ?".to_string()),
458        ("sqlserver", "phoneCountryCodeEq") => Some("SUBSTRING($field, 1, LEN(?)) = ?".to_string()),
459
460        ("postgres", "phoneCountryCodeIn") => Some("SUBSTRING($field FROM 1 FOR POSITION('+' IN $field)) IN ($params)".to_string()),
461        ("mysql", "phoneCountryCodeIn") => Some("SUBSTRING($field, 1, LOCATE('+', $field)) IN ($params)".to_string()),
462        ("sqlite", "phoneCountryCodeIn") => Some("SUBSTR($field, 1, INSTR($field, '+')) IN ($params)".to_string()),
463        ("sqlserver", "phoneCountryCodeIn") => Some("SUBSTRING($field, 1, CHARINDEX('+', $field)) IN ($params)".to_string()),
464
465        // Phone: E.164 format validation (+[1-9]{1,3}[0-9]{1,14})
466        ("postgres", "phoneIsValid") => Some("$field ~ '^\\+[1-9]\\d{1,14}$' = $1".to_string()),
467        ("mysql", "phoneIsValid") => Some("$field REGEXP '^\\\\+[1-9]\\\\d{1,14}$' = ?".to_string()),
468        ("sqlite", "phoneIsValid") => Some("$field GLOB '+[1-9]*' AND LENGTH($field) BETWEEN 5 AND 15".to_string()),
469        ("sqlserver", "phoneIsValid") => Some("$field LIKE '+[1-9]%'".to_string()),
470
471        // Phone: Type classification (mobile, fixed, etc.)
472        ("postgres", "phoneTypeEq") => Some("CASE WHEN $field ~ '^\\+1' THEN 'US' WHEN $field ~ '^\\+44' THEN 'UK' ELSE 'OTHER' END = $1".to_string()),
473        ("mysql", "phoneTypeEq") => Some("CASE WHEN $field REGEXP '^\\\\+1' THEN 'US' WHEN $field REGEXP '^\\\\+44' THEN 'UK' ELSE 'OTHER' END = ?".to_string()),
474        ("sqlite", "phoneTypeEq") => Some("CASE WHEN $field GLOB '+1*' THEN 'US' WHEN $field GLOB '+44*' THEN 'UK' ELSE 'OTHER' END = ?".to_string()),
475        ("sqlserver", "phoneTypeEq") => Some("CASE WHEN $field LIKE '+1%' THEN 'US' WHEN $field LIKE '+44%' THEN 'UK' ELSE 'OTHER' END = ?".to_string()),
476
477        // ========================================================================
478        // DATE RANGE OPERATORS
479        // ========================================================================
480        // Format: JSON with {start: ISO8601, end: ISO8601} or period string
481
482        // DateRange: Duration in days >= min
483        ("postgres", "durationGte") => Some(
484            "EXTRACT(DAY FROM ($field->>'end')::timestamp - ($field->>'start')::timestamp) >= $1"
485                .to_string()
486        ),
487        ("mysql", "durationGte") => Some(
488            "DATEDIFF(JSON_EXTRACT($field, '$.end'), JSON_EXTRACT($field, '$.start')) >= ?"
489                .to_string()
490        ),
491        ("sqlite", "durationGte") => Some(
492            "CAST((julianday(json_extract($field, '$.end')) - julianday(json_extract($field, '$.start'))) AS INTEGER) >= ?"
493                .to_string()
494        ),
495        ("sqlserver", "durationGte") => Some(
496            "DATEDIFF(DAY, JSON_VALUE($field, '$.start'), JSON_VALUE($field, '$.end')) >= ?"
497                .to_string()
498        ),
499
500        // DateRange: Starts after date
501        ("postgres", "startsAfter") => Some("($field->>'start')::timestamp > $1::timestamp".to_string()),
502        ("mysql", "startsAfter") => Some("JSON_EXTRACT($field, '$.start') > ?".to_string()),
503        ("sqlite", "startsAfter") => Some("json_extract($field, '$.start') > ?".to_string()),
504        ("sqlserver", "startsAfter") => Some("JSON_VALUE($field, '$.start') > ?".to_string()),
505
506        // DateRange: Ends before date
507        ("postgres", "endsBefore") => Some("($field->>'end')::timestamp < $1::timestamp".to_string()),
508        ("mysql", "endsBefore") => Some("JSON_EXTRACT($field, '$.end') < ?".to_string()),
509        ("sqlite", "endsBefore") => Some("json_extract($field, '$.end') < ?".to_string()),
510        ("sqlserver", "endsBefore") => Some("JSON_VALUE($field, '$.end') < ?".to_string()),
511
512        // DateRange: Overlaps with another range
513        ("postgres", "overlaps") => Some(
514            "($field->>'start')::timestamp < $2::timestamp AND ($field->>'end')::timestamp > $1::timestamp"
515                .to_string()
516        ),
517        ("mysql", "overlaps") => Some(
518            "JSON_EXTRACT($field, '$.start') < ? AND JSON_EXTRACT($field, '$.end') > ?"
519                .to_string()
520        ),
521        ("sqlite", "overlaps") => Some(
522            "json_extract($field, '$.start') < ? AND json_extract($field, '$.end') > ?"
523                .to_string()
524        ),
525        ("sqlserver", "overlaps") => Some(
526            "JSON_VALUE($field, '$.start') < ? AND JSON_VALUE($field, '$.end') > ?"
527                .to_string()
528        ),
529
530        // ========================================================================
531        // DURATION OPERATORS
532        // ========================================================================
533        // Format: ISO8601 duration (P1Y2M3DT4H5M6S) or total seconds/milliseconds
534
535        // Duration: Total seconds equals
536        ("postgres", "totalSecondsEq") => Some(
537            "EXTRACT(EPOCH FROM CAST($field AS INTERVAL)) = $1"
538                .to_string()
539        ),
540        ("mysql", "totalSecondsEq") => Some(
541            "CAST(REPLACE($field, 'PT', '') AS UNSIGNED) = ?"
542                .to_string()
543        ),
544        ("sqlite", "totalSecondsEq") => Some(
545            "CAST(REPLACE($field, 'PT', '') AS INTEGER) = ?"
546                .to_string()
547        ),
548        ("sqlserver", "totalSecondsEq") => Some(
549            "CAST(SUBSTRING($field, 3, LEN($field)) AS BIGINT) = ?"
550                .to_string()
551        ),
552
553        // Duration: Total minutes >= min
554        ("postgres", "totalMinutesGte") => Some(
555            "EXTRACT(EPOCH FROM CAST($field AS INTERVAL)) / 60 >= $1"
556                .to_string()
557        ),
558        ("mysql", "totalMinutesGte") => Some(
559            "CAST(REPLACE($field, 'PT', '') AS UNSIGNED) / 60 >= ?"
560                .to_string()
561        ),
562        ("sqlite", "totalMinutesGte") => Some(
563            "CAST(REPLACE($field, 'PT', '') AS INTEGER) / 60 >= ?"
564                .to_string()
565        ),
566        ("sqlserver", "totalMinutesGte") => Some(
567            "CAST(SUBSTRING($field, 3, LEN($field)) AS BIGINT) / 60 >= ?"
568                .to_string()
569        ),
570
571        // Standard operators (not extended operators, so no templates)
572        _ => None,
573    }
574}
575
576/// Extract SQL templates for a specific operator from all database handlers.
577///
578/// Returns a map of database name to SQL template.
579/// If a database handler doesn't support the operator, it's omitted from the map.
580pub fn extract_operator_templates(operator_name: &str) -> HashMap<String, String> {
581    let mut templates = HashMap::new();
582
583    for db in &["postgres", "mysql", "sqlite", "sqlserver"] {
584        if let Some(template) = extract_template_for_operator(db, operator_name) {
585            templates.insert((*db).to_string(), template);
586        }
587    }
588
589    templates
590}
591
592/// Build SQL templates metadata for rich filter types.
593///
594/// Generates a JSON structure like:
595/// ```json
596/// {
597///   "operators": {
598///     "emailDomainEq": {
599///       "postgres": "SPLIT_PART($field, '@', 2) = $1",
600///       "mysql": "SUBSTRING_INDEX($field, '@', -1) = ?",
601///       "sqlite": "...",
602///       "sqlserver": "..."
603///     }
604///   }
605/// }
606/// ```
607pub fn build_sql_templates_metadata(operator_names: &[&str]) -> serde_json::Value {
608    let mut operators = serde_json::Map::new();
609
610    for op_name in operator_names {
611        let templates = extract_operator_templates(op_name);
612        if !templates.is_empty() {
613            operators.insert((*op_name).to_string(), json!(templates));
614        }
615    }
616
617    json!({
618        "operators": operators
619    })
620}
621
622#[cfg(test)]
623mod tests {
624    use super::*;
625
626    #[test]
627    fn test_extract_operator_templates() {
628        let templates = extract_operator_templates("domainEq");
629
630        // Should have templates for all 4 databases
631        assert_eq!(templates.len(), 4);
632        assert!(templates.contains_key("postgres"));
633        assert!(templates.contains_key("mysql"));
634        assert!(templates.contains_key("sqlite"));
635        assert!(templates.contains_key("sqlserver"));
636
637        // Verify templates are correct
638        assert!(templates["postgres"].contains("SPLIT_PART"));
639        assert!(templates["mysql"].contains("SUBSTRING_INDEX"));
640    }
641
642    #[test]
643    fn test_build_sql_templates_metadata() {
644        let operators = vec!["domainEq", "wmiEq"];
645        let metadata = build_sql_templates_metadata(&operators);
646
647        assert!(metadata.get("operators").is_some());
648        let ops = metadata["operators"].as_object().unwrap();
649        assert_eq!(ops.len(), 2);
650        assert!(ops.contains_key("domainEq"));
651        assert!(ops.contains_key("wmiEq"));
652    }
653
654    #[test]
655    fn test_extract_vin_templates() {
656        let templates = extract_operator_templates("wmiEq");
657
658        assert!(templates.contains_key("postgres"));
659        assert!(templates["postgres"].contains("SUBSTRING"));
660        assert!(templates["mysql"].contains("SUBSTRING"));
661    }
662
663    #[test]
664    fn test_geospatial_templates() {
665        let templates = extract_operator_templates("distanceWithin");
666
667        assert!(templates.contains_key("postgres"));
668        assert!(templates["postgres"].contains("ST_DWithin"));
669
670        assert!(templates.contains_key("mysql"));
671        assert!(templates["mysql"].contains("ST_Distance_Sphere"));
672
673        assert!(templates.contains_key("sqlite"));
674        assert!(templates["sqlite"].contains("Haversine") || templates["sqlite"].contains("ACOS"));
675
676        assert!(templates.contains_key("sqlserver"));
677        assert!(templates["sqlserver"].contains("geography"));
678    }
679
680    #[test]
681    fn test_phone_templates() {
682        let templates = extract_operator_templates("phoneCountryCodeEq");
683
684        assert!(templates.contains_key("postgres"));
685        assert!(templates.contains_key("mysql"));
686        assert!(templates.contains_key("sqlite"));
687        assert!(templates.contains_key("sqlserver"));
688    }
689
690    #[test]
691    fn test_date_range_templates() {
692        let templates = extract_operator_templates("durationGte");
693
694        assert!(templates.contains_key("postgres"));
695        assert!(templates["postgres"].contains("EXTRACT"));
696
697        assert!(templates.contains_key("mysql"));
698        assert!(templates["mysql"].contains("DATEDIFF"));
699    }
700
701    #[test]
702    fn test_duration_templates() {
703        let templates = extract_operator_templates("totalSecondsEq");
704
705        assert!(templates.contains_key("postgres"));
706        assert!(templates["postgres"].contains("EPOCH"));
707
708        assert!(templates.contains_key("mysql"));
709        assert!(templates["mysql"].contains("REPLACE"));
710    }
711}