genome-sh 0.1.0

The jq of genomics. Fast, local, human-readable variant analysis.
use anyhow::Result;
use rusqlite::Connection;

/// Create all database tables and indices.
pub fn create_tables(conn: &Connection) -> Result<()> {
    conn.execute_batch(
        "
        CREATE TABLE IF NOT EXISTS variants (
            id INTEGER PRIMARY KEY,
            chrom TEXT NOT NULL,
            pos INTEGER NOT NULL,
            ref TEXT NOT NULL,
            alt TEXT NOT NULL,
            rsid TEXT,
            gene TEXT,
            assembly TEXT DEFAULT 'GRCh38'
        );

        CREATE UNIQUE INDEX IF NOT EXISTS idx_variant_pos
            ON variants(chrom, pos, ref, alt);

        CREATE INDEX IF NOT EXISTS idx_rsid
            ON variants(rsid) WHERE rsid IS NOT NULL;

        CREATE INDEX IF NOT EXISTS idx_gene
            ON variants(gene) WHERE gene IS NOT NULL;

        CREATE TABLE IF NOT EXISTS clinvar (
            variant_id INTEGER PRIMARY KEY REFERENCES variants(id),
            significance TEXT,
            review_stars INTEGER,
            conditions TEXT,
            last_reviewed TEXT,
            submission_count INTEGER
        );

        CREATE TABLE IF NOT EXISTS gnomad_cache (
            variant_id INTEGER PRIMARY KEY REFERENCES variants(id),
            af_global REAL,
            af_afr REAL,
            af_amr REAL,
            af_asj REAL,
            af_eas REAL,
            af_fin REAL,
            af_nfe REAL,
            af_sas REAL,
            an INTEGER,
            ac INTEGER,
            hom INTEGER,
            cached_at TEXT
        );

        CREATE TABLE IF NOT EXISTS alphamissense (
            variant_id INTEGER PRIMARY KEY REFERENCES variants(id),
            am_pathogenicity REAL,
            am_class TEXT
        );

        CREATE TABLE IF NOT EXISTS clingen (
            gene TEXT PRIMARY KEY,
            disease TEXT,
            classification TEXT,
            haploinsufficiency TEXT,
            triplosensitivity TEXT
        );

        CREATE TABLE IF NOT EXISTS pharmgkb (
            variant_id INTEGER NOT NULL REFERENCES variants(id),
            drug TEXT NOT NULL,
            phenotype TEXT,
            evidence_level TEXT,
            PRIMARY KEY (variant_id, drug)
        );

        CREATE TABLE IF NOT EXISTS uniprot (
            variant_id INTEGER NOT NULL REFERENCES variants(id),
            protein_name TEXT,
            aa_change TEXT,
            consequence TEXT,
            disease TEXT,
            PRIMARY KEY (variant_id, protein_name)
        );

        CREATE TABLE IF NOT EXISTS db_meta (
            source TEXT PRIMARY KEY,
            version TEXT,
            updated_at TEXT,
            variant_count INTEGER
        );
        ",
    )?;

    Ok(())
}