genomicframe-core 0.2.0

High-performance genomics I/O and interoperability layer
Documentation
//! I/O abstractions for local, async, and cloud storage
//!
//! This module provides unified interfaces for reading and writing genomic data
//! from various sources: local filesystem, cloud storage (S3, GCS, Azure), etc.

use crate::error::{Error, Result};
use std::path::Path;

/// Supported compression formats
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Compression {
    /// No compression
    None,
    /// Gzip compression (.gz)
    Gzip,
    /// Bgzip compression (block gzip, .bgz)
    Bgzip,
    /// Zstd compression (.zst)
    Zstd,
}

impl Compression {
    /// Detect compression from file extension
    pub fn from_path<P: AsRef<Path>>(path: P) -> Self {
        let path = path.as_ref();
        if let Some(ext) = path.extension() {
            match ext.to_str() {
                Some("gz") => Compression::Gzip,
                Some("bgz") | Some("bgzf") => Compression::Bgzip,
                Some("zst") => Compression::Zstd,
                _ => Compression::None,
            }
        } else {
            Compression::None
        }
    }
}

/// File location (local or remote)
#[derive(Debug, Clone)]
pub enum FileLocation {
    /// Local filesystem path
    Local(String),
    /// S3 URI (s3://bucket/key)
    S3(String),
    /// GCS URI (gs://bucket/key)
    Gcs(String),
    /// Azure Blob Storage URI
    Azure(String),
    /// HTTP/HTTPS URL
    Http(String),
}

impl FileLocation {
    /// Parse a file location from a string
    pub fn parse(s: &str) -> Self {
        if s.starts_with("s3://") {
            FileLocation::S3(s.to_string())
        } else if s.starts_with("gs://") {
            FileLocation::Gcs(s.to_string())
        } else if s.starts_with("http://") || s.starts_with("https://") {
            FileLocation::Http(s.to_string())
        } else if s.starts_with("az://") || s.starts_with("azure://") {
            FileLocation::Azure(s.to_string())
        } else {
            FileLocation::Local(s.to_string())
        }
    }

    /// Check if this is a local file
    pub fn is_local(&self) -> bool {
        matches!(self, FileLocation::Local(_))
    }

    /// Check if this is a remote location
    pub fn is_remote(&self) -> bool {
        !self.is_local()
    }
}

/// Cloud storage integration (placeholder for Phase 1b)
pub struct CloudReader;

impl CloudReader {
    /// Open a cloud file (not yet implemented)
    pub fn open(_location: FileLocation) -> Result<Self> {
        Err(Error::NotImplemented(
            "Cloud storage support will be implemented in Phase 1b".to_string(),
        ))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_compression_detection() {
        assert_eq!(
            Compression::from_path("file.vcf.gz"),
            Compression::Gzip
        );
        assert_eq!(
            Compression::from_path("file.vcf.bgz"),
            Compression::Bgzip
        );
        assert_eq!(
            Compression::from_path("file.vcf"),
            Compression::None
        );
    }

    #[test]
    fn test_file_location_parsing() {
        assert!(matches!(
            FileLocation::parse("/local/path/file.vcf"),
            FileLocation::Local(_)
        ));
        assert!(matches!(
            FileLocation::parse("s3://bucket/file.vcf"),
            FileLocation::S3(_)
        ));
        assert!(matches!(
            FileLocation::parse("gs://bucket/file.vcf"),
            FileLocation::Gcs(_)
        ));
    }
}