briefcase-python 2.4.1

Python bindings for Briefcase AI
Documentation
"""DVC client for data versioning and artifact tracking."""

from typing import Optional, Dict, Any
import logging
import os

from briefcase.integrations.vcs.base import VcsClientBase

logger = logging.getLogger(__name__)


class DvcClient(VcsClientBase):
    """
    DVC (Data Version Control) client for managing versioned data.

    DVC enables data versioning using Git and supports remote storage
    backends for large files and datasets.

    Configuration priority (highest to lowest):
        1. Explicit parameters
        2. Environment variables (DVC_REMOTE, DVC_REPO_PATH)
        3. Git repository root with dvc.yaml
        4. Current directory

    Usage:
        client = DvcClient(
            repository="my-dvc-repo",
            branch="main",
            briefcase_client=briefcase_client,
            repo_path="/path/to/repo"
        )
        data = client.read_object("data/train.csv")
        client.create_version("Updated training dataset")
    """

    def __init__(
        self,
        repository: str,
        branch: str = "main",
        briefcase_client=None,
        repo_path: Optional[str] = None,
        remote: Optional[str] = None,
        **extra
    ):
        """
        Initialize DVC client.

        Args:
            repository: Repository name
            branch: Git branch name (default: "main")
            briefcase_client: Optional BriefcaseClient for instrumentation
            repo_path: Path to DVC repository root
            remote: DVC remote name or URL
            **extra: Additional DVC configuration options
        """
        super().__init__(
            provider_type="dvc",
            repository=repository,
            branch=branch,
            briefcase_client=briefcase_client,
            endpoint=remote,
            **extra
        )

        # DVC-specific configuration
        self.repo_path = repo_path or os.getenv("DVC_REPO_PATH", ".")
        self.remote = remote or os.getenv("DVC_REMOTE")

        # Initialize DVC client
        try:
            import dvc.repo
            self._provider_client = dvc.repo.Repo(self.repo_path)
            self._has_provider = True
        except (ImportError, Exception) as e:
            logger.warning(f"DVC not available: {e}. Using mock mode.")
            self._has_provider = False

    def _read_object_impl(self, path: str) -> bytes:
        """Read object from DVC-tracked file."""
        if not self._has_provider:
            # Mock mode
            return b"Mock DVC content: " + path.encode()

        try:
            full_path = f"{self.repo_path}/{path}"
            with open(full_path, "rb") as f:
                return f.read()
        except FileNotFoundError:
            raise FileNotFoundError(f"DVC object not found: {path}")

    def _write_object_impl(
        self,
        path: str,
        data: bytes,
        content_type: str
    ) -> None:
        """Write object to DVC-tracked file."""
        if not self._has_provider:
            logger.info(f"Mock DVC: Would write {len(data)} bytes to {path}")
            return

        try:
            full_path = f"{self.repo_path}/{path}"
            os.makedirs(os.path.dirname(full_path), exist_ok=True)
            with open(full_path, "wb") as f:
                f.write(data)
            logger.info(f"Wrote {len(data)} bytes to {path}")
        except Exception as e:
            logger.error(f"Failed to write DVC object: {e}")
            raise

    def _create_version_impl(
        self,
        message: str,
        metadata: Optional[Dict[str, str]]
    ) -> str:
        """Create DVC version via Git commit."""
        if not self._has_provider:
            # Mock mode
            return f"dvc-{self.branch}-mock-version"

        try:
            import subprocess
            # Add to git and commit
            subprocess.run(
                ["git", "add", "-A"],
                cwd=self.repo_path,
                check=True,
                capture_output=True
            )
            result = subprocess.run(
                ["git", "commit", "-m", message],
                cwd=self.repo_path,
                capture_output=True,
                text=True
            )

            # Get current commit SHA
            sha_result = subprocess.run(
                ["git", "rev-parse", "HEAD"],
                cwd=self.repo_path,
                capture_output=True,
                text=True,
                check=True
            )
            return sha_result.stdout.strip()
        except Exception as e:
            logger.error(f"Failed to create DVC version: {e}")
            raise