sql-cli 1.67.1 - Docs.rs

#!/usr/bin/env python3
"""
Comprehensive tests for SQL string methods
"""

import subprocess
import pytest
from pathlib import Path
from io import StringIO
import pandas as pd

class TestStringMethods:
    """Comprehensive test suite for string method functionality"""
    
    @classmethod
    def setup_class(cls):
        """Setup test environment"""
        cls.project_root = Path(__file__).parent.parent.parent
        cls.sql_cli = str(cls.project_root / "target" / "release" / "sql-cli")
        
        # Build if needed
        if not Path(cls.sql_cli).exists():
            subprocess.run(["cargo", "build", "--release"], 
                          cwd=cls.project_root, check=True)
    
    def run_query(self, csv_file: str, query: str):
        """Helper to run a SQL query"""
        cmd = [
            self.sql_cli, 
            str(self.project_root / "data" / csv_file), 
            "-q", query, 
            "-o", "csv"
        ]
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
        
        if result.returncode != 0:
            # Return error for testing error cases
            return None, result.stderr
            
        if result.stdout.strip():
            return pd.read_csv(StringIO(result.stdout.strip())), None
        return pd.DataFrame(), None

    # TRIM METHODS
    def test_trim_removes_both_spaces(self):
        """Test Trim() removes leading and trailing spaces"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id, name.Trim() as trimmed FROM test_simple_strings WHERE id IN (4, 7, 8)")
        assert len(df) == 3
        assert df[df['id'] == 4]['trimmed'].iloc[0] == 'David'
        assert df[df['id'] == 7]['trimmed'].iloc[0] == 'Grace'
        assert df[df['id'] == 8]['trimmed'].iloc[0] == 'Henry'
    
    def test_trimstart_removes_leading_spaces(self):
        """Test TrimStart() removes only leading spaces"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id, name.TrimStart() as trimmed FROM test_simple_strings WHERE id = 7")
        assert len(df) == 1
        assert df.iloc[0]['trimmed'] == 'Grace'
    
    def test_trimend_removes_trailing_spaces(self):
        """Test TrimEnd() removes only trailing spaces"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id, name.TrimEnd() as trimmed FROM test_simple_strings WHERE id = 8")
        assert len(df) == 1
        assert df.iloc[0]['trimmed'] == 'Henry'
    
    # LENGTH METHOD
    def test_length_counts_characters(self):
        """Test Length() counts all characters including spaces"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id, name, name.Length() as len FROM test_simple_strings WHERE id IN (1, 4)")
        assert len(df) == 2
        assert df[df['id'] == 1]['len'].iloc[0] == 5  # "Alice"
        assert df[df['id'] == 4]['len'].iloc[0] == 9  # "  David  "
    
    def test_length_in_where(self):
        """Test Length() can be used in WHERE clause"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id, name FROM test_simple_strings WHERE name.Length() > 6")
        assert len(df) == 4  # "  David  ", "Charlie", "  Grace", "Henry  "
        assert set(df['id'].tolist()) == {3, 4, 7, 8}
    
    # INDEXOF METHOD
    def test_indexof_finds_substring(self):
        """Test IndexOf() finds position of substring"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id, email, email.IndexOf('@') as at_pos FROM test_simple_strings WHERE id = 1")
        assert len(df) == 1
        assert df.iloc[0]['at_pos'] == 5  # "alice@example.com"
    
    def test_indexof_returns_minus_one(self):
        """Test IndexOf() returns -1 when substring not found"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id, name, name.IndexOf('x') as pos FROM test_simple_strings WHERE id = 1")
        assert len(df) == 1
        assert df.iloc[0]['pos'] == -1
    
    def test_indexof_in_where(self):
        """Test IndexOf() in WHERE clause to filter results"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id FROM test_simple_strings WHERE email.IndexOf('gmail') > 0")
        assert len(df) == 1
        assert df.iloc[0]['id'] == 5
    
    # CONTAINS METHOD
    def test_contains_finds_substring(self):
        """Test Contains() checks for substring presence"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id FROM test_simple_strings WHERE email.Contains('example')")
        assert len(df) == 4  # example.com and example.org
        assert set(df['id'].tolist()) == {1, 4, 7, 10}
    
    def test_contains_case_sensitive(self):
        """Test Contains() - appears to be case-insensitive based on logs"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id FROM test_simple_strings WHERE status.Contains('active')")
        # Contains appears to be case-insensitive, so it matches Active and Inactive
        assert len(df) == 7  # Active (1,3,5,7,9) + Inactive (2,8)
        assert set(df['id'].tolist()) == {1, 2, 3, 5, 7, 8, 9}
    
    # STARTSWITH METHOD
    def test_startswith_prefix_check(self):
        """Test StartsWith() checks string prefix"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id FROM test_simple_strings WHERE code.StartsWith('ABC')")
        assert len(df) == 1
        assert df.iloc[0]['id'] == 1
    
    def test_startswith_case_sensitive(self):
        """Test StartsWith() - appears to be case-insensitive"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id FROM test_simple_strings WHERE status.StartsWith('a')")
        # StartsWith appears to be case-insensitive like Contains
        assert len(df) == 6  # Active and Archived both start with 'a' or 'A'
        assert set(df['id'].tolist()) == {1, 3, 5, 6, 7, 9}
    
    # ENDSWITH METHOD
    def test_endswith_suffix_check(self):
        """Test EndsWith() checks string suffix"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id FROM test_simple_strings WHERE email.EndsWith('.org')")
        assert len(df) == 2
        assert set(df['id'].tolist()) == {2, 7}
    
    def test_endswith_with_spaces(self):
        """Test EndsWith() on strings with trailing spaces"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id FROM test_simple_strings WHERE name.EndsWith('  ')")
        assert len(df) == 2  # "  David  " and "Henry  "
        assert set(df['id'].tolist()) == {4, 8}
    
    # COMBINED OPERATIONS
    def test_trim_then_length(self):
        """Test combining Trim() with Length()"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id, name.Trim().Length() as trimmed_len FROM test_simple_strings WHERE id = 4")
        assert len(df) == 1
        assert df.iloc[0]['trimmed_len'] == 5  # "David" after trim
    
    def test_multiple_string_methods_in_select(self):
        """Test multiple string methods in single SELECT"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id, name.Trim() as trimmed, email.IndexOf('@') as at_pos, code.Length() as code_len FROM test_simple_strings WHERE id = 1")
        assert len(df) == 1
        assert df.iloc[0]['trimmed'] == 'Alice'
        assert df.iloc[0]['at_pos'] == 5
        assert df.iloc[0]['code_len'] == 6
    
    def test_string_method_with_arithmetic(self):
        """Test string methods combined with arithmetic operations"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id, name.Length() + email.Length() as total_len FROM test_simple_strings WHERE id = 1")
        assert len(df) == 1
        assert df.iloc[0]['total_len'] == 22  # 5 + 17
    
    # EDGE CASES
    def test_method_on_empty_string(self):
        """Test string methods on empty strings if they exist"""
        # This test would need data with empty strings
        pass
    
    def test_method_with_special_characters(self):
        """Test string methods with special characters in arguments"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id FROM test_simple_strings WHERE email.Contains('.')")
        assert len(df) == 10  # All emails contain '.'
    
    # METHOD CHAINING
    def test_simple_method_chaining(self):
        """Test chaining two string methods"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id, name.Trim().Length() as result FROM test_simple_strings WHERE id = 4")
        assert len(df) == 1
        assert df.iloc[0]['result'] == 5  # "  David  " -> "David" -> 5
    
    def test_triple_method_chaining(self):
        """Test chaining three methods"""
        # Note: This might not be supported yet
        df, err = self.run_query("test_simple_strings.csv",
                                "SELECT id, name.Trim().TrimStart().Length() as result FROM test_simple_strings WHERE id = 4")
        if err:
            pytest.skip("Triple method chaining not yet supported")
        else:
            assert len(df) == 1
            assert df.iloc[0]['result'] == 5
    
    # COMPLEX WHERE CONDITIONS
    def test_string_methods_with_and(self):
        """Test string methods in WHERE with AND"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id FROM test_simple_strings WHERE name.StartsWith('A') AND email.EndsWith('.com')")
        assert len(df) == 1
        assert df.iloc[0]['id'] == 1
    
    def test_string_methods_with_or(self):
        """Test string methods in WHERE with OR"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id FROM test_simple_strings WHERE name.StartsWith('A') OR name.StartsWith('B')")
        assert len(df) == 2
        assert set(df['id'].tolist()) == {1, 2}
    
    def test_string_method_with_not_equal(self):
        """Test string methods with != operator"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT id FROM test_simple_strings WHERE name.Length() != 5")
        # Names with length != 5: Bob(3), Charlie(7), "  David  "(9), Eve(3), "  Grace"(7), "Henry  "(7), Iris(4), Jack(4)
        assert len(df) == 8  # All except Alice(5) and Frank(5)
        assert set(df['id'].tolist()) == {2, 3, 4, 5, 7, 8, 9, 10}
    
    # PERFORMANCE TESTS (with larger datasets if available)
    def test_string_method_on_all_rows(self):
        """Test string method performance on all rows"""
        df, _ = self.run_query("test_simple_strings.csv",
                               "SELECT name.Trim() as trimmed FROM test_simple_strings")
        assert len(df) == 10
        # Check a few known values
        assert 'David' in df['trimmed'].values
        assert 'Grace' in df['trimmed'].values

if __name__ == "__main__":
    pytest.main([__file__, "-v"])