Skip to main content

tck_extractor/
tck_extractor.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4//! Extract all TCK queries from Cucumber .feature files with placeholders properly substituted.
5
6use std::collections::HashMap;
7use std::fs;
8use std::io::Write;
9use std::path::Path;
10
11#[derive(Debug, Clone)]
12struct Query {
13    feature_file: String,
14    _scenario_name: String,
15    query_text: String,
16    expected_error: Option<String>,
17}
18
19fn main() {
20    let tck_root = "../../cypher-tck/tck-M23/tck/features";
21    let valid_output = "VALID_TCK_QUERIES.md";
22    let invalid_output = "INVALID_TCK_QUERIES.md";
23
24    println!("Extracting TCK queries from: {}", tck_root);
25
26    let (valid_queries, invalid_queries) = extract_all_queries(tck_root);
27
28    println!("Extracted {} valid queries", valid_queries.len());
29    println!(
30        "Extracted {} invalid queries (expected to fail)",
31        invalid_queries.len()
32    );
33
34    write_queries_file(valid_output, &valid_queries, false);
35    write_queries_file(invalid_output, &invalid_queries, true);
36
37    println!("Wrote valid queries to: {}", valid_output);
38    println!("Wrote invalid queries to: {}", invalid_output);
39}
40
41fn extract_all_queries(root: &str) -> (Vec<Query>, Vec<Query>) {
42    let mut queries = Vec::new();
43
44    visit_features(Path::new(root), &mut queries);
45
46    // Separate valid and invalid queries
47    let mut valid = Vec::new();
48    let mut invalid = Vec::new();
49
50    for query in queries {
51        if query.expected_error.is_some() {
52            invalid.push(query);
53        } else {
54            valid.push(query);
55        }
56    }
57
58    valid.sort_by(|a, b| a.feature_file.cmp(&b.feature_file));
59    invalid.sort_by(|a, b| a.feature_file.cmp(&b.feature_file));
60
61    (valid, invalid)
62}
63
64fn visit_features(dir: &Path, queries: &mut Vec<Query>) {
65    if !dir.is_dir() {
66        return;
67    }
68
69    let mut entries: Vec<_> = fs::read_dir(dir).unwrap().filter_map(|e| e.ok()).collect();
70
71    entries.sort_by_key(|e| e.path());
72
73    for entry in entries {
74        let path = entry.path();
75        if path.is_dir() {
76            visit_features(&path, queries);
77        } else if path.extension().and_then(|s| s.to_str()) == Some("feature") {
78            extract_from_feature(&path, queries);
79        }
80    }
81}
82
83fn extract_from_feature(path: &Path, queries: &mut Vec<Query>) {
84    let content = fs::read_to_string(path).unwrap();
85    let feature_path = path.to_string_lossy().to_string();
86
87    let lines: Vec<&str> = content.lines().collect();
88    let mut i = 0;
89
90    while i < lines.len() {
91        let line = lines[i].trim_start();
92
93        // Check for Scenario or Scenario Outline
94        if line.starts_with("Scenario:") || line.starts_with("Scenario Outline:") {
95            let is_outline = line.starts_with("Scenario Outline:");
96
97            // Check if previous line has @skipGrammarCheck tag
98            let has_skip_tag = i > 0 && lines[i - 1].trim().contains("@skipGrammarCheck");
99
100            // Extract scenario name (format: "Scenario: [N] Name")
101            let scenario_name = line.split("] ").nth(1).unwrap_or("unnamed").trim();
102
103            // Find the end of this scenario (next Scenario or end of file)
104            let mut end = i + 1;
105            while end < lines.len() {
106                let next_line = lines[end].trim_start();
107                if next_line.starts_with("Scenario:") || next_line.starts_with("Scenario Outline:")
108                {
109                    break;
110                }
111                end += 1;
112            }
113
114            let scenario_lines = &lines[i..end];
115            let scenario_text = scenario_lines.join("\n");
116
117            if is_outline {
118                extract_scenario_outline(
119                    &feature_path,
120                    scenario_name,
121                    &scenario_text,
122                    has_skip_tag,
123                    queries,
124                );
125            } else {
126                extract_scenario(
127                    &feature_path,
128                    scenario_name,
129                    &scenario_text,
130                    has_skip_tag,
131                    queries,
132                );
133            }
134
135            i = end;
136        } else {
137            i += 1;
138        }
139    }
140}
141
142fn extract_scenario(
143    feature_path: &str,
144    scenario_name: &str,
145    scenario_text: &str,
146    has_skip_tag: bool,
147    queries: &mut Vec<Query>,
148) {
149    if let Some(query) = extract_query_text(scenario_text) {
150        let expected_error = if has_skip_tag {
151            extract_expected_error(scenario_text)
152        } else {
153            None
154        };
155
156        queries.push(Query {
157            feature_file: feature_path.to_string(),
158            _scenario_name: scenario_name.to_string(),
159            query_text: query,
160            expected_error,
161        });
162    }
163}
164
165fn extract_scenario_outline(
166    feature_path: &str,
167    scenario_name: &str,
168    scenario_text: &str,
169    has_skip_tag: bool,
170    queries: &mut Vec<Query>,
171) {
172    let template = match extract_query_text(scenario_text) {
173        Some(t) => t,
174        None => return,
175    };
176
177    let expected_error = if has_skip_tag {
178        extract_expected_error(scenario_text)
179    } else {
180        None
181    };
182
183    let examples = extract_examples(scenario_text);
184
185    if examples.is_empty() {
186        // No examples, just add the template
187        queries.push(Query {
188            feature_file: feature_path.to_string(),
189            _scenario_name: scenario_name.to_string(),
190            query_text: template,
191            expected_error,
192        });
193        return;
194    }
195
196    // Generate one query per example row
197    for (idx, example) in examples.iter().enumerate() {
198        let substituted = substitute_placeholders(&template, example);
199        queries.push(Query {
200            feature_file: feature_path.to_string(),
201            _scenario_name: format!("{} (example {})", scenario_name, idx + 1),
202            query_text: substituted,
203            expected_error: expected_error.clone(),
204        });
205    }
206}
207
208fn extract_expected_error(text: &str) -> Option<String> {
209    // Look for "Then a SyntaxError should be raised at compile time: <ErrorType>"
210    for line in text.lines() {
211        let trimmed = line.trim();
212        if trimmed.starts_with("Then a SyntaxError should be raised")
213            || trimmed.starts_with("Then a") && trimmed.contains("should be raised")
214        {
215            // Extract error type after the last colon
216            if let Some(colon_pos) = trimmed.rfind(':') {
217                let error_type = trimmed[colon_pos + 1..].trim();
218                if !error_type.is_empty() {
219                    return Some(error_type.to_string());
220                }
221            }
222        }
223    }
224    None
225}
226
227fn extract_query_text(text: &str) -> Option<String> {
228    // Find the query between """ markers
229    let start_marker = "When executing query:\n      \"\"\"";
230    let end_marker = "\"\"\"";
231
232    let start = text.find(start_marker)?;
233    let query_start = start + start_marker.len();
234
235    let remaining = &text[query_start..];
236    let end = remaining.find(end_marker)?;
237
238    let query = &remaining[..end];
239
240    // Clean up the query text - remove leading whitespace from each line
241    let cleaned: Vec<&str> = query
242        .lines()
243        .map(|line| {
244            // Remove exactly 6 spaces of indentation (Gherkin convention)
245            if let Some(stripped) = line.strip_prefix("      ") {
246                stripped
247            } else {
248                line.trim_start()
249            }
250        })
251        .filter(|line| !line.is_empty())
252        .collect();
253
254    Some(cleaned.join("\n"))
255}
256
257fn extract_examples(text: &str) -> Vec<HashMap<String, String>> {
258    let mut results = Vec::new();
259
260    // Find "Examples:" section
261    let examples_start = match text.find("Examples:") {
262        Some(pos) => pos,
263        None => return results,
264    };
265
266    let examples_text = &text[examples_start..];
267
268    // Parse table
269    let lines: Vec<&str> = examples_text.lines().collect();
270
271    // Find header row (starts with |)
272    let header_idx = match lines.iter().position(|l| l.trim().starts_with('|')) {
273        Some(idx) => idx,
274        None => return results,
275    };
276
277    let header_line = lines[header_idx];
278
279    // Parse header
280    let headers: Vec<&str> = header_line
281        .split('|')
282        .map(|s| s.trim())
283        .filter(|s| !s.is_empty())
284        .collect();
285
286    // Parse data rows
287    for line in &lines[header_idx + 1..] {
288        let trimmed = line.trim();
289        if !trimmed.starts_with('|') {
290            break; // End of table
291        }
292
293        let values: Vec<&str> = trimmed
294            .split('|')
295            .map(|s| s.trim())
296            .filter(|s| !s.is_empty())
297            .collect();
298
299        if values.len() == headers.len() {
300            let mut row = HashMap::new();
301            for (header, value) in headers.iter().zip(values.iter()) {
302                row.insert(header.to_string(), value.to_string());
303            }
304            results.push(row);
305        }
306    }
307
308    results
309}
310
311fn substitute_placeholders(template: &str, values: &HashMap<String, String>) -> String {
312    let mut result = template.to_string();
313
314    // Replace placeholders in order (longest first to avoid partial replacements)
315    let mut keys: Vec<_> = values.keys().collect();
316    keys.sort_by_key(|k| std::cmp::Reverse(k.len()));
317
318    for key in keys {
319        let placeholder = format!("<{}>", key);
320        let value = &values[key];
321        result = result.replace(&placeholder, value);
322    }
323
324    result
325}
326
327fn write_queries_file(path: &str, queries: &Vec<Query>, include_expected_errors: bool) {
328    let mut file = fs::File::create(path).unwrap();
329
330    for query in queries {
331        writeln!(file, "// {}", query.feature_file).unwrap();
332        if include_expected_errors && let Some(ref error) = query.expected_error {
333            writeln!(file, "// Expected error: {}", error).unwrap();
334        }
335        writeln!(file, "{}", query.query_text).unwrap();
336        writeln!(file).unwrap();
337    }
338}
339
340#[cfg(test)]
341mod tests {
342    use super::*;
343
344    #[test]
345    fn test_substitute_placeholders() {
346        let template = "CALL test.my.proc(null) YIELD <yield>\nRETURN a, b";
347        let mut values = HashMap::new();
348        values.insert("yield".to_string(), "a, b".to_string());
349
350        let result = substitute_placeholders(template, &values);
351        assert_eq!(result, "CALL test.my.proc(null) YIELD a, b\nRETURN a, b");
352    }
353
354    #[test]
355    fn test_substitute_multiple() {
356        let template = "RETURN <a> AND <b>";
357        let mut values = HashMap::new();
358        values.insert("a".to_string(), "123".to_string());
359        values.insert("b".to_string(), "true".to_string());
360
361        let result = substitute_placeholders(template, &values);
362        assert_eq!(result, "RETURN 123 AND true");
363    }
364}