briefcase-python 2.4.1

"""
Semantic validation using LLM (Layer 3).
"""

import json
from typing import List, Optional

try:
    from opentelemetry import trace
    HAS_OTEL = True
    tracer = trace.get_tracer(__name__)
except ImportError:
    HAS_OTEL = False
    tracer = None

from briefcase.validation.extractors import Reference
from briefcase.validation.errors import ValidationError, ValidationErrorCode


class SemanticValidator:
    """
    LLM-based semantic validation for complex cases.
    """

    def __init__(
        self,
        llm_client,
        lakefs_client,
        repository: str,
        branch: str = "main"
    ):
        self.llm = llm_client
        self.lakefs = lakefs_client
        self.repository = repository
        self.branch = branch

    def validate_semantic(
        self,
        prompt: str,
        references: List[Reference]
    ) -> List[ValidationError]:
        """
        Perform semantic validation on prompt references.
        Checks for: schema changes, content drift, structural changes.
        """
        if HAS_OTEL and tracer:
            with tracer.start_as_current_span("validation.semantic_check") as span:
                return self._validate_semantic_with_telemetry(prompt, references, span)
        else:
            return self._validate_semantic_internal(prompt, references)

    def _validate_semantic_with_telemetry(
        self,
        prompt: str,
        references: List[Reference],
        span
    ) -> List[ValidationError]:
        """Validate with telemetry."""
        span.set_attribute("validation.semantic.enabled", True)
        if hasattr(self.llm, 'model_name'):
            span.set_attribute("validation.semantic.model", self.llm.model_name)

        errors = self._validate_semantic_internal(prompt, references)

        return errors

    def _validate_semantic_internal(
        self,
        prompt: str,
        references: List[Reference]
    ) -> List[ValidationError]:
        """Internal semantic validation logic."""
        errors = []

        for ref in references:
            # Load current content
            content = self._load_reference_content(ref)
            if content is None:
                continue  # Already caught by resolution layer

            # Check for structural changes
            error = self._check_structure(prompt, ref, content)
            if error:
                errors.append(error)

        return errors

    def _load_reference_content(self, ref: Reference) -> Optional[str]:
        """Load reference content from lakeFS."""
        try:
            return self.lakefs.read_object(
                repository=self.repository,
                branch=self.branch,
                path=ref.path
            )
        except Exception:
            return None

    def _check_structure(
        self,
        prompt: str,
        ref: Reference,
        content: str
    ) -> Optional[ValidationError]:
        """
        Check if referenced structure still exists.
        E.g., "Section 4.2.3" still exists and hasn't been renumbered.
        """
        if HAS_OTEL and tracer:
            with tracer.start_as_current_span("validation.semantic.structure_check") as span:
                return self._check_structure_with_telemetry(prompt, ref, content, span)
        else:
            return self._check_structure_internal(prompt, ref, content)

    def _check_structure_with_telemetry(
        self,
        prompt: str,
        ref: Reference,
        content: str,
        span
    ) -> Optional[ValidationError]:
        """Check structure with telemetry."""
        error = self._check_structure_internal(prompt, ref, content)

        if error and error.metadata and 'confidence' in error.metadata:
            span.set_attribute("validation.semantic.confidence", error.metadata['confidence'])

        return error

    def _check_structure_internal(
        self,
        prompt: str,
        ref: Reference,
        content: str
    ) -> Optional[ValidationError]:
        """Internal structure check logic."""
        # Build validation prompt
        validation_prompt = f"""
        A prompt references: "{ref.text}"

        Current document content (first 2000 characters):
        {content[:2000]}...

        Question: Does the referenced content still exist in the same form?
        Are there any structural changes (e.g., section renumbered, content moved)?

        Answer with JSON:
        {{
            "exists": true/false,
            "changes": "description of changes" or null,
            "confidence": 0.0-1.0
        }}
        """

        # Call LLM
        try:
            response = self.llm.complete(validation_prompt)
            result = self._parse_llm_response(response)

            if not result['exists'] and result['confidence'] > 0.7:
                return ValidationError(
                    code=ValidationErrorCode.REFERENCE_GONE,
                    message=f"Referenced content may have changed: {result['changes']}",
                    reference=ref.text,
                    severity="warning",
                    layer="semantic",
                    remediation="Review document structure changes",
                    metadata={'confidence': result['confidence']}
                )

        except Exception as e:
            # LLM errors are non-fatal
            pass

        return None

    def _parse_llm_response(self, response: str) -> dict:
        """Parse LLM JSON response."""
        try:
            return json.loads(response)
        except Exception:
            return {'exists': True, 'changes': None, 'confidence': 0.0}