import subprocess
import csv
from io import StringIO
import sys
import os
import tempfile
SQL_CLI = os.path.join(os.path.dirname(__file__), "../../target/release/sql-cli")
def run_query(query, data_file=None):
cmd = [SQL_CLI]
if data_file:
cmd.append(data_file)
cmd.extend(["-q", query, "-o", "csv"])
result = subprocess.run(
cmd,
capture_output=True,
text=True,
)
if result.returncode != 0:
raise Exception(f"Query failed: {result.stderr}")
reader = csv.DictReader(StringIO(result.stdout))
return list(reader)
def create_test_file():
content = """id,name,category,value,status
1,Alice,A,100,active
2,Bob,B,200,active
3,Charlie,A,100,inactive
4,David,C,300,active
5,Eve,B,200,inactive
6,Frank,A,100,active
7,Grace,C,300,active
8,Henry,B,200,active
9,Ivy,D,400,active
10,Jack,A,100,inactive"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
f.write(content)
return f.name
def test_distinct_single_column():
test_file = create_test_file()
try:
query = "SELECT DISTINCT category FROM test ORDER BY category"
result = run_query(query, test_file)
categories = [row['category'] for row in result]
assert categories == ['A', 'B', 'C', 'D'], f"Expected ['A', 'B', 'C', 'D'], got {categories}"
query = "SELECT DISTINCT value FROM test ORDER BY value"
result = run_query(query, test_file)
values = [int(row['value']) for row in result]
assert values == [100, 200, 300, 400], f"Expected [100, 200, 300, 400], got {values}"
print("✓ DISTINCT single column test passed")
finally:
os.unlink(test_file)
def test_distinct_multiple_columns():
test_file = create_test_file()
try:
query = "SELECT DISTINCT category, value FROM test ORDER BY category, value"
result = run_query(query, test_file)
expected = [
('A', 100),
('B', 200),
('C', 300),
('D', 400)
]
actual = [(row['category'], int(row['value'])) for row in result]
assert actual == expected, f"Expected {expected}, got {actual}"
print("✓ DISTINCT multiple columns test passed")
finally:
os.unlink(test_file)
def test_distinct_with_status():
test_file = create_test_file()
try:
query = "SELECT DISTINCT category, status FROM test ORDER BY category, status"
result = run_query(query, test_file)
combinations = [(row['category'], row['status']) for row in result]
assert ('A', 'active') in combinations
assert ('A', 'inactive') in combinations
assert ('B', 'active') in combinations
assert ('B', 'inactive') in combinations
print("✓ DISTINCT with status test passed")
finally:
os.unlink(test_file)
def test_distinct_all_columns():
test_file = create_test_file()
try:
query = "SELECT COUNT(*) AS total FROM test"
result = run_query(query, test_file)
total = int(result[0]['total'])
assert total == 10, f"Expected 10 total rows, got {total}"
query = "SELECT DISTINCT * FROM test"
result = run_query(query, test_file)
assert len(result) == 10, f"Expected 10 distinct rows, got {len(result)}"
print("✓ DISTINCT all columns test passed")
finally:
os.unlink(test_file)
def test_distinct_with_where():
test_file = create_test_file()
try:
query = "SELECT DISTINCT category FROM test WHERE status = 'active' ORDER BY category"
result = run_query(query, test_file)
categories = [row['category'] for row in result]
assert categories == ['A', 'B', 'C', 'D'], f"Expected all categories for active status"
query = "SELECT DISTINCT category FROM test WHERE value = 100 ORDER BY category"
result = run_query(query, test_file)
categories = [row['category'] for row in result]
assert categories == ['A'], f"Expected only 'A' for value 100"
print("✓ DISTINCT with WHERE test passed")
finally:
os.unlink(test_file)
def test_distinct_with_expressions():
query = "WITH nums AS (SELECT value % 5 AS mod5 FROM RANGE(1, 25)) SELECT DISTINCT mod5 FROM nums ORDER BY mod5"
result = run_query(query)
mod_values = [int(row['mod5']) for row in result]
assert mod_values == [0, 1, 2, 3, 4], f"Expected [0,1,2,3,4], got {mod_values}"
query = "WITH nums AS (SELECT FLOOR(value / 10) AS tens FROM RANGE(1, 35)) SELECT DISTINCT tens FROM nums ORDER BY tens"
result = run_query(query)
tens_values = [int(float(row['tens'])) for row in result]
assert tens_values == [0, 1, 2, 3], f"Expected [0,1,2,3], got {tens_values}"
print("✓ DISTINCT with expressions test passed")
def test_distinct_with_cte():
test_file = create_test_file()
try:
query = """
WITH unique_categories AS (
SELECT DISTINCT category FROM test
)
SELECT COUNT(*) AS category_count FROM unique_categories
"""
result = run_query(query, test_file)
count = int(result[0]['category_count'])
assert count == 4, f"Expected 4 unique categories, got {count}"
print("✓ DISTINCT with CTE test passed")
finally:
os.unlink(test_file)
def test_distinct_with_limit():
test_file = create_test_file()
try:
query = "SELECT DISTINCT category FROM test ORDER BY category LIMIT 2"
result = run_query(query, test_file)
categories = [row['category'] for row in result]
assert categories == ['A', 'B'], f"Expected ['A', 'B'] with LIMIT 2"
print("✓ DISTINCT with LIMIT test passed")
finally:
os.unlink(test_file)
def test_distinct_count():
test_file = create_test_file()
try:
query = """
WITH dist AS (SELECT DISTINCT category FROM test)
SELECT COUNT(*) AS distinct_count FROM dist
"""
result = run_query(query, test_file)
count = int(result[0]['distinct_count'])
assert count == 4, f"Expected 4 distinct categories, got {count}"
query = """
WITH dist AS (SELECT DISTINCT value FROM test)
SELECT COUNT(*) AS distinct_count FROM dist
"""
result = run_query(query, test_file)
count = int(result[0]['distinct_count'])
assert count == 4, f"Expected 4 distinct values, got {count}"
print("✓ DISTINCT count test passed")
finally:
os.unlink(test_file)
def test_distinct_with_null_values():
content = """id,name,category
1,Alice,A
2,Bob,C
3,Charlie,A
4,David,B
5,Eve,B
6,Frank,C"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
f.write(content)
test_file = f.name
try:
query = "SELECT DISTINCT category FROM test ORDER BY category"
result = run_query(query, test_file)
categories = [row['category'] for row in result]
assert len(result) == 3, f"Expected 3 distinct values, got {len(result)}: {categories}"
assert 'A' in categories
assert 'B' in categories
assert 'C' in categories
print("✓ DISTINCT with duplicates test passed")
finally:
os.unlink(test_file)
def test_distinct_case_sensitivity():
content = """id,name
1,Alice
2,alice
3,ALICE
4,Bob
5,bob"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
f.write(content)
test_file = f.name
try:
query = "SELECT DISTINCT name FROM test ORDER BY name"
result = run_query(query, test_file)
names = [row['name'] for row in result]
assert len(names) == 5, f"Expected 5 distinct names (case sensitive)"
print("✓ DISTINCT case sensitivity test passed")
finally:
os.unlink(test_file)
def main():
tests = [
test_distinct_single_column,
test_distinct_multiple_columns,
test_distinct_with_status,
test_distinct_all_columns,
test_distinct_with_where,
test_distinct_with_expressions,
test_distinct_with_cte,
test_distinct_with_limit,
test_distinct_count,
test_distinct_with_null_values,
test_distinct_case_sensitivity
]
passed = 0
failed = 0
print("Running DISTINCT tests...")
print("=" * 60)
for test in tests:
try:
test()
passed += 1
except Exception as e:
print(f"✗ {test.__name__}: {str(e)}")
failed += 1
print("=" * 60)
print(f"Results: {passed} passed, {failed} failed")
if failed > 0:
sys.exit(1)
if __name__ == "__main__":
main()