briefcase-python 2.4.1

Python bindings for Briefcase AI
Documentation
"""Tests for Python bindings - data sanitization functionality."""

from __future__ import annotations

import briefcase_ai
import pytest


class TestSanitizer:
    def test_sanitizer_creation(self):
        sanitizer = briefcase_ai.Sanitizer()
        assert sanitizer is not None

    def test_disabled_sanitizer(self):
        sanitizer = briefcase_ai.Sanitizer.disabled()

        result = sanitizer.sanitize("test@email.com")
        assert result.sanitized == "test@email.com"
        assert len(result.redactions) == 0

    def test_email_sanitization(self):
        sanitizer = briefcase_ai.Sanitizer()

        text = "Contact me at john.doe@example.com for details."
        result = sanitizer.sanitize(text)

        assert "[REDACTED_EMAIL]" in result.sanitized
        assert len(result.redactions) == 1
        assert result.redactions[0].pii_type == "email"

    def test_multiple_pii_sanitization(self):
        sanitizer = briefcase_ai.Sanitizer()

        text = "Contact john@example.com at 555-123-4567 or visit 192.168.1.100"
        result = sanitizer.sanitize(text)

        assert "[REDACTED_EMAIL]" in result.sanitized
        assert "[REDACTED_PHONE]" in result.sanitized
        assert "[REDACTED_IP]" in result.sanitized
        assert len(result.redactions) == 3

    def test_custom_pattern_add_and_remove(self):
        sanitizer = briefcase_ai.Sanitizer()

        sanitizer.add_pattern("employee_id", r"\bEMP-\d{6}\b")
        redacted = sanitizer.sanitize("Employee ID: EMP-123456")
        assert "[REDACTED_EMPLOYEE_ID]" in redacted.sanitized

        removed = sanitizer.remove_pattern("employee_id")
        assert removed is True

        plain = sanitizer.sanitize("Employee ID: EMP-123456")
        assert "[REDACTED_EMPLOYEE_ID]" not in plain.sanitized

    def test_remove_builtin_pattern_returns_false(self):
        sanitizer = briefcase_ai.Sanitizer()
        removed = sanitizer.remove_pattern("email")
        assert removed is False

    def test_enable_disable(self):
        sanitizer = briefcase_ai.Sanitizer()
        text = "Email: test@example.com"

        sanitizer.set_enabled(False)
        disabled = sanitizer.sanitize(text)
        assert disabled.sanitized == text

        sanitizer.set_enabled(True)
        enabled = sanitizer.sanitize(text)
        assert "[REDACTED_EMAIL]" in enabled.sanitized

    def test_sanitize_json(self):
        sanitizer = briefcase_ai.Sanitizer()

        data = {
            "user": {
                "email": "john@example.com",
                "phone": "555-123-4567",
            },
            "config": {
                "api_key": "sk-1234567890abcdef1234567890abcdef",
                "timeout": 30,
            },
        }

        result = sanitizer.sanitize_json(data)

        assert result.sanitized["user"]["email"] == "[REDACTED_EMAIL]"
        assert result.sanitized["user"]["phone"] == "[REDACTED_PHONE]"
        assert result.sanitized["config"]["api_key"] == "[REDACTED_API_KEY]"
        assert result.sanitized["config"]["timeout"] == 30
        assert result.redaction_count == 3

    def test_contains_pii(self):
        sanitizer = briefcase_ai.Sanitizer()

        assert sanitizer.contains_pii("Email: john@example.com") is True
        assert sanitizer.contains_pii("No pii here") is False

    def test_analyze_pii(self):
        sanitizer = briefcase_ai.Sanitizer()

        text = "Contact john@example.com or jane@test.org at 555-123-4567"
        analysis = sanitizer.analyze_pii(text)

        assert analysis["has_pii"] is True
        assert analysis["total_matches"] == 3
        assert analysis["unique_types"] == 2
        assert set(analysis["detected_types"]) == {"email", "phone"}

    def test_invalid_pattern(self):
        sanitizer = briefcase_ai.Sanitizer()

        with pytest.raises(Exception):
            sanitizer.add_pattern("invalid", "[")

    def test_result_to_dict(self):
        sanitizer = briefcase_ai.Sanitizer()

        result = sanitizer.sanitize("Email: test@example.com")
        obj = result.to_dict()

        assert "sanitized" in obj
        assert "redaction_count" in obj
        assert "has_redactions" in obj
        assert "redactions" in obj
        assert obj["sanitized"] == result.sanitized

    def test_json_result_to_dict(self):
        sanitizer = briefcase_ai.Sanitizer()

        result = sanitizer.sanitize_json({"email": "test@example.com", "count": 5})
        obj = result.to_dict()

        assert "sanitized" in obj
        assert "redaction_count" in obj

    def test_redaction_to_dict(self):
        sanitizer = briefcase_ai.Sanitizer()
        result = sanitizer.sanitize("Email: test@example.com")

        redaction = result.redactions[0]
        obj = redaction.to_dict()

        assert obj["pii_type"] == "email"
        assert obj["start_position"] < obj["end_position"]


class TestSanitizationIntegration:
    def test_training_data_redaction(self):
        sanitizer = briefcase_ai.Sanitizer()

        examples = [
            "Customer john.doe@company.com reported issue with order #12345",
            "Support ticket: Call Sarah at (555) 123-4567 regarding account 123-45-6789",
            "API logs show requests from 192.168.1.100 using key sk-abc123def4567890123456789012",
        ]

        redaction_count = 0
        for example in examples:
            result = sanitizer.sanitize(example)
            redaction_count += len(result.redactions)

        assert redaction_count > 0

    def test_configuration_data_sanitization(self):
        sanitizer = briefcase_ai.Sanitizer()

        config = {
            "database": {"host": "192.168.1.50", "port": 5432},
            "api": {
                "openai_key": "sk-1234567890abcdefghijklmnop",
                "rate_limit": 1000,
            },
            "notifications": {"admin_email": "admin@company.com"},
        }

        result = sanitizer.sanitize_json(config)

        assert result.sanitized["database"]["host"] == "[REDACTED_IP]"
        assert result.sanitized["api"]["openai_key"] == "[REDACTED_API_KEY]"
        assert result.sanitized["notifications"]["admin_email"] == "[REDACTED_EMAIL]"
        assert result.sanitized["database"]["port"] == 5432
        assert result.sanitized["api"]["rate_limit"] == 1000


if __name__ == "__main__":
    pytest.main([__file__])