sql-cli 1.71.2 - Docs.rs

"""Test HAVING clause functionality with GROUP BY."""
import subprocess
import json
import os
import tempfile
import pytest
from pathlib import Path

SQL_CLI_PATH = Path(__file__).parent.parent.parent / "target" / "release" / "sql-cli"


def run_sql_query(csv_file, query):
    """Execute SQL query and return results as list of dictionaries."""
    result = subprocess.run(
        [str(SQL_CLI_PATH), csv_file, "-q", query, "-o", "json"],
        capture_output=True,
        text=True,
    )
    if result.returncode != 0:
        raise Exception(f"Query failed: {result.stderr}")
    
    # Parse JSON output
    output = result.stdout.strip()
    
    # Remove comment lines that start with #
    lines = output.split('\n')
    json_lines = []
    for line in lines:
        if line and not line.startswith('#'):
            json_lines.append(line)
    
    if not json_lines:
        return []
    
    # Join back to get the full JSON
    json_str = '\n'.join(json_lines)
    
    try:
        # Parse as a JSON array
        results = json.loads(json_str)
        if isinstance(results, list):
            return results
        else:
            return [results]
    except json.JSONDecodeError:
        # If not valid JSON, return empty list
        return []


class TestBasicHaving:
    """Test basic HAVING clause functionality."""
    
    @pytest.fixture
    def sales_data(self):
        """Create test sales data."""
        with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
            f.write("salesperson,region,product,quantity,revenue\n")
            f.write("Alice,North,Widget,100,1000\n")
            f.write("Alice,North,Gadget,50,750\n")
            f.write("Alice,South,Widget,75,750\n")
            f.write("Bob,North,Widget,200,2000\n")
            f.write("Bob,North,Gadget,25,375\n")
            f.write("Charlie,South,Widget,150,1500\n")
            f.write("Charlie,South,Gadget,100,1500\n")
            f.write("Charlie,South,Tool,50,500\n")
            f.write("David,North,Widget,50,500\n")
            csv_file = f.name
        
        yield csv_file
        os.unlink(csv_file)
    
    def test_having_with_count(self, sales_data):
        """Test HAVING with COUNT aggregate."""
        query = """SELECT salesperson, COUNT(*) as sale_count 
                   FROM test 
                   GROUP BY salesperson 
                   HAVING sale_count > 2
                   ORDER BY salesperson"""
        results = run_sql_query(sales_data, query)
        
        expected = [
            {"salesperson": "Alice", "sale_count": 3},
            {"salesperson": "Charlie", "sale_count": 3},
        ]
        
        assert len(results) == len(expected)
        for actual, exp in zip(results, expected):
            assert actual["salesperson"] == exp["salesperson"]
            assert actual["sale_count"] == exp["sale_count"]
    
    def test_having_with_sum(self, sales_data):
        """Test HAVING with SUM aggregate."""
        query = """SELECT salesperson, SUM(revenue) as total_revenue 
                   FROM test 
                   GROUP BY salesperson 
                   HAVING total_revenue >= 2500
                   ORDER BY salesperson"""
        results = run_sql_query(sales_data, query)
        
        expected = [
            {"salesperson": "Alice", "total_revenue": 2500},
            {"salesperson": "Charlie", "total_revenue": 3500},
        ]
        
        assert len(results) == len(expected)
        for actual, exp in zip(results, expected):
            assert actual["salesperson"] == exp["salesperson"]
            assert actual["total_revenue"] == exp["total_revenue"]
    
    def test_having_with_avg(self, sales_data):
        """Test HAVING with AVG aggregate."""
        query = """SELECT region, AVG(revenue) as avg_revenue 
                   FROM test 
                   GROUP BY region 
                   HAVING avg_revenue > 1000
                   ORDER BY region"""
        results = run_sql_query(sales_data, query)
        
        # South region has avg revenue > 1000
        assert len(results) == 1
        assert results[0]["region"] == "South"
        assert results[0]["avg_revenue"] > 1000
    
    def test_having_with_min_max(self, sales_data):
        """Test HAVING with MIN and MAX aggregates."""
        query = """SELECT product, 
                          MIN(quantity) as min_qty, 
                          MAX(quantity) as max_qty 
                   FROM test 
                   GROUP BY product 
                   HAVING max_qty > 100
                   ORDER BY product"""
        results = run_sql_query(sales_data, query)
        
        # Widget has max_qty = 200, Gadget has max_qty = 100 (excluded)
        expected = [
            {"product": "Widget", "min_qty": 50, "max_qty": 200}
        ]
        
        assert len(results) == len(expected)
        for actual, exp in zip(results, expected):
            assert actual["product"] == exp["product"]
            assert actual["min_qty"] == exp["min_qty"]
            assert actual["max_qty"] == exp["max_qty"]


class TestComplexHaving:
    """Test complex HAVING expressions."""
    
    @pytest.fixture
    def transaction_data(self):
        """Create transaction test data."""
        with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
            f.write("user_id,category,amount,fee\n")
            f.write("1,Food,100,5\n")
            f.write("1,Food,50,2.5\n")
            f.write("1,Transport,30,1.5\n")
            f.write("2,Food,200,10\n")
            f.write("2,Entertainment,150,7.5\n")
            f.write("3,Food,75,3.75\n")
            f.write("3,Food,125,6.25\n")
            f.write("3,Transport,40,2\n")
            f.write("3,Entertainment,100,5\n")
            csv_file = f.name
        
        yield csv_file
        os.unlink(csv_file)
    
    def test_having_with_multiple_conditions(self, transaction_data):
        """Test HAVING with AND/OR conditions."""
        query = """SELECT user_id, 
                          COUNT(*) as transaction_count,
                          SUM(amount) as total_amount 
                   FROM test 
                   GROUP BY user_id 
                   HAVING transaction_count >= 4 AND total_amount > 300
                   ORDER BY user_id"""
        results = run_sql_query(transaction_data, query)
        
        # Only user_id 3 has >= 4 transactions AND total > 300
        expected = [
            {"user_id": 3, "transaction_count": 4, "total_amount": 340}
        ]
        
        assert len(results) == len(expected)
        for actual, exp in zip(results, expected):
            assert actual["user_id"] == exp["user_id"]
            assert actual["transaction_count"] == exp["transaction_count"]
            assert actual["total_amount"] == exp["total_amount"]
    
    def test_having_with_arithmetic(self, transaction_data):
        """Test HAVING with arithmetic expressions."""
        query = """SELECT category, 
                          SUM(amount) as total,
                          SUM(fee) as total_fee
                   FROM test 
                   GROUP BY category 
                   HAVING total_fee > total * 0.04
                   ORDER BY category"""
        results = run_sql_query(transaction_data, query)
        
        # Food has fee/amount ratio of exactly 5%, Entertainment ~5%
        # We're looking for > 4%, so all categories should qualify
        assert len(results) >= 2  # At least Food and Entertainment
    
    def test_having_with_comparison_operators(self, transaction_data):
        """Test HAVING with various comparison operators."""
        # Test HAVING with >=
        query = """SELECT category, AVG(amount) as avg_amount 
                   FROM test 
                   GROUP BY category 
                   HAVING avg_amount >= 100
                   ORDER BY category"""
        results = run_sql_query(transaction_data, query)
        
        # Food avg = ~112.5, Entertainment avg = 125
        assert len(results) == 2
        categories = [r["category"] for r in results]
        assert "Food" in categories
        assert "Entertainment" in categories
    
    def test_having_between(self, transaction_data):
        """Test HAVING with simple comparison."""
        # Test with a simple < comparison instead of BETWEEN
        query = """SELECT user_id, SUM(amount) as total 
                   FROM test 
                   GROUP BY user_id 
                   HAVING total < 200
                   ORDER BY user_id"""
        results = run_sql_query(transaction_data, query)
        
        # User 1: 180 (included), User 2: 350 (excluded), User 3: 340 (excluded)
        expected = [
            {"user_id": 1, "total": 180}
        ]
        
        assert len(results) == len(expected)
        for actual, exp in zip(results, expected):
            assert actual["user_id"] == exp["user_id"]
            assert actual["total"] == exp["total"]


class TestHavingWithWhere:
    """Test HAVING combined with WHERE clauses."""
    
    @pytest.fixture
    def order_data(self):
        """Create order test data."""
        with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
            f.write("customer,status,amount,discount\n")
            f.write("Alice,completed,100,10\n")
            f.write("Alice,completed,200,20\n")
            f.write("Alice,cancelled,150,0\n")
            f.write("Bob,completed,300,30\n")
            f.write("Bob,pending,100,0\n")
            f.write("Charlie,completed,50,5\n")
            f.write("Charlie,completed,75,7\n")
            f.write("Charlie,completed,125,12\n")
            csv_file = f.name
        
        yield csv_file
        os.unlink(csv_file)
    
    def test_where_then_having(self, order_data):
        """Test WHERE clause filtering before GROUP BY and HAVING."""
        query = """SELECT customer, 
                          COUNT(*) as order_count,
                          SUM(amount) as total 
                   FROM test 
                   WHERE status = 'completed'
                   GROUP BY customer 
                   HAVING order_count >= 2
                   ORDER BY customer"""
        results = run_sql_query(order_data, query)
        
        # Only Alice and Charlie have >= 2 completed orders
        expected = [
            {"customer": "Alice", "order_count": 2, "total": 300},
            {"customer": "Charlie", "order_count": 3, "total": 250},
        ]
        
        assert len(results) == len(expected)
        for actual, exp in zip(results, expected):
            assert actual["customer"] == exp["customer"]
            assert actual["order_count"] == exp["order_count"]
            assert actual["total"] == exp["total"]
    
    def test_having_with_filtered_aggregates(self, order_data):
        """Test HAVING on aggregates of filtered data."""
        query = """SELECT customer, 
                          AVG(amount - discount) as avg_net 
                   FROM test 
                   WHERE status = 'completed'
                   GROUP BY customer 
                   HAVING avg_net > 80
                   ORDER BY customer"""
        results = run_sql_query(order_data, query)
        
        # Calculate expected net averages for completed orders
        # Alice: (90 + 180) / 2 = 135
        # Bob: 270 / 1 = 270
        # Charlie: (45 + 68 + 113) / 3 = 75.33
        
        assert len(results) == 2  # Alice and Bob
        customers = [r["customer"] for r in results]
        assert "Alice" in customers
        assert "Bob" in customers


class TestHavingEdgeCases:
    """Test edge cases for HAVING clause."""
    
    @pytest.fixture
    def simple_data(self):
        """Create simple test data."""
        with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
            f.write("group_col,value\n")
            f.write("A,10\n")
            f.write("A,20\n")
            f.write("B,30\n")
            f.write("B,40\n")
            f.write("C,50\n")
            csv_file = f.name
        
        yield csv_file
        os.unlink(csv_file)
    
    def test_having_filters_all_groups(self, simple_data):
        """Test HAVING that filters out all groups."""
        query = """SELECT group_col, SUM(value) as total 
                   FROM test 
                   GROUP BY group_col 
                   HAVING total > 1000"""
        results = run_sql_query(simple_data, query)
        
        # No group has sum > 1000
        assert len(results) == 0
    
    def test_having_with_single_group(self, simple_data):
        """Test HAVING with only one qualifying group."""
        query = """SELECT group_col, COUNT(*) as count 
                   FROM test 
                   GROUP BY group_col 
                   HAVING count = 1"""
        results = run_sql_query(simple_data, query)
        
        # Only group C has count = 1
        expected = [{"group_col": "C", "count": 1}]
        
        assert len(results) == len(expected)
        assert results[0]["group_col"] == expected[0]["group_col"]
        assert results[0]["count"] == expected[0]["count"]
    
    def test_having_without_alias_not_supported(self, simple_data):
        """Test that HAVING without alias (using raw aggregate) might not work."""
        # This test documents current behavior - using COUNT(*) directly in HAVING
        # may not work, needs to use alias
        query = """SELECT group_col, COUNT(*) as cnt 
                   FROM test 
                   GROUP BY group_col 
                   HAVING cnt > 1"""
        results = run_sql_query(simple_data, query)
        
        # Groups A and B have count > 1
        assert len(results) == 2
        groups = [r["group_col"] for r in results]
        assert "A" in groups
        assert "B" in groups