gsem 0.1.0 - Docs.rs

use std::collections::HashMap;
use std::path::{Path, PathBuf};

use anyhow::{Context, Result};
use clap::{Args, Parser, Subcommand};
use faer::Mat;

use gsem::io::gwas_reader;
use gsem::munge;

#[derive(Parser)]
#[command(
    name = "gsem",
    about = "Genomic Structural Equation Modeling",
    long_about = "\
Genomic Structural Equation Modeling from GWAS summary statistics.

Pipeline: `munge` → `ldsc` (or `hdl`) → `commonfactor` / `usermodel` / `userGWAS` / ...

Use `gsem <subcommand> --help` to see a worked example for each subcommand
(use long `--help`, not short `-h`, to get the extended help with examples)."
)]
struct Cli {
    #[command(subcommand)]
    command: Commands,
}

#[derive(Subcommand)]
enum Commands {
    /// QC and munge raw GWAS summary statistics
    #[command(long_about = "\
QC and munge raw GWAS summary statistics against an HM3 SNPlist.

Writes one `<trait>.sumstats.gz` per input file, harmonising allele and
effect columns. Output feeds `gsem ldsc` and `gsem sumstats`.

EXAMPLE:
    gsem munge \\
      --files raw_T1.txt.gz raw_T2.txt.gz \\
      --hm3 w_hm3.snplist \\
      --trait-names T1 T2 \\
      --N 1e5 1e5")]
    Munge(MungeArgs),
    /// Run multivariate LD Score Regression
    #[command(long_about = "\
Estimate a genetic covariance matrix (and sampling-covariance matrix)
across munged GWAS summary statistics via LD score regression with a
block jackknife. Writes `ldsc.json` consumable by downstream subcommands
(`commonfactor`, `usermodel`, `userGWAS`, `commonfactorGWAS`, ...).

EXAMPLE:
    gsem ldsc \\
      --traits T1.sumstats.gz T2.sumstats.gz T3.sumstats.gz \\
      --ld eur_w_ld_chr/ \\
      --out ldsc.json")]
    Ldsc(LdscArgs),
    /// Fit structural equation model
    #[command(
        name = "usermodel",
        long_about = "\
Fit a user-specified SEM to the genetic covariance structure from `gsem
ldsc`. Writes parameter estimates and fit indices to a TSV.

EXAMPLE:
    gsem ldsc --traits T1.sumstats.gz T2.sumstats.gz T3.sumstats.gz \\
              --ld eur_w_ld_chr/ --out ldsc.json
    gsem usermodel \\
      --covstruc ldsc.json \\
      --model 'F1 =~ NA*V1 + V2 + V3\\nF1 ~~ 1*F1' \\
      --out fit.tsv"
    )]
    Sem(SemArgs),
    /// Merge GWAS summary statistics for multivariate GWAS
    #[command(long_about = "\
Merge per-trait sumstats into a single SNP x trait TSV for `userGWAS` /
`commonfactorGWAS`. Aligns alleles to a reference panel and applies QC
filters.

EXAMPLE:
    gsem sumstats \\
      --files T1.sumstats.gz T2.sumstats.gz \\
      --ref eur_w_ld_chr/ \\
      --trait-names T1 T2 \\
      --out merged_sumstats.tsv")]
    Sumstats(SumstatsArgs),
    /// Fit common factor model (auto-generated 1-factor CFA)
    #[command(
        name = "commonfactor",
        long_about = "\
Auto-generate and fit a 1-factor CFA model to the genetic covariance
structure from `gsem ldsc`. Writes parameter estimates and fit indices
(chisq, df, p_chisq, AIC, CFI, SRMR) to a TSV.

EXAMPLE:
    gsem ldsc --traits T1.sumstats.gz T2.sumstats.gz T3.sumstats.gz \\
              --ld eur_w_ld_chr/ --out ldsc.json
    gsem commonfactor --covstruc ldsc.json --out cf.tsv"
    )]
    CommonFactor(CommonFactorArgs),
    /// Run multivariate GWAS
    #[command(
        name = "userGWAS",
        long_about = "\
Fit a user-specified SEM at every SNP in a merged sumstats file. Writes
two TSVs: a per-SNP table (metadata + fit stats) and a flat parameter
table (join on SNP).

EXAMPLE:
    gsem sumstats --files T1.sumstats.gz T2.sumstats.gz T3.sumstats.gz \\
                  --ref eur_w_ld_chr/ --out merged_sumstats.tsv
    gsem userGWAS \\
      --covstruc ldsc.json \\
      --snps merged_sumstats.tsv \\
      --model 'F1 =~ NA*V1 + V2 + V3\\nF1 ~ SNP\\nF1 ~~ 1*F1' \\
      --out-snps snps.tsv --out-params params.tsv"
    )]
    Gwas(GwasArgs),
    /// Run common factor GWAS (auto-generated 1-factor model per SNP)
    #[command(
        name = "commonfactorGWAS",
        long_about = "\
Auto-generate a 1-factor model with SNP → F1 and fit it at every SNP.

WARNING: this subcommand uses an auto-generated fixed-variance
parameterisation that differs from R's `GenomicSEM::commonfactorGWAS`,
which uses a marker-indicator approach. Results are not bit-identical.
Set GSEMR_COMMONFACTOR_GWAS_QUIET=1 to suppress the first-use warning.

EXAMPLE:
    gsem commonfactorGWAS \\
      --covstruc ldsc.json \\
      --snps merged_sumstats.tsv \\
      --out-snps snps.tsv --out-params params.tsv"
    )]
    CommonfactorGwas(CommonfactorGwasArgs),
    /// Auto-generate model syntax from factor loadings
    #[command(
        name = "write.model",
        long_about = "\
Convert a factor loading matrix (CSV/TSV) into lavaan-style model syntax.
Loadings below `--cutoff` are dropped; residual variances can be fixed
positive with `--fix-resid`.

EXAMPLE:
    gsem write.model --loadings loadings.csv --cutoff 0.3"
    )]
    WriteModel(WriteModelArgs),
    /// Parallel analysis to determine number of factors
    #[command(
        name = "paLDSC",
        long_about = "\
Compare observed eigenvalues of the genetic covariance matrix to the
95th percentile of eigenvalues simulated from sampling covariances, and
report the recommended number of non-spurious factors.

EXAMPLE:
    gsem paLDSC --covstruc ldsc.json --r 500 --out pa.tsv"
    )]
    ParallelAnalysis(ParallelAnalysisArgs),
    /// Enrichment analysis using stratified LDSC results
    #[command(long_about = "\
Enrichment analysis stacking `gsem s_ldsc` output with a SEM model.
Returns per-annotation enrichment and SE.

EXAMPLE:
    gsem s_ldsc --traits T1.sumstats.gz --ld baselineLD_v2.2/ \\
                --wld weights_hm3_no_hla/ --frq 1000G_Phase3_frq/ \\
                --out sldsc.json
    gsem enrich --sldsc sldsc.json --model 'F1 =~ NA*V1' --out enr.tsv")]
    Enrich(EnrichArgs),
    /// Run stratified (partitioned) LD Score Regression
    #[command(
        name = "s_ldsc",
        long_about = "\
Partition heritability across functional annotations.  Writes the per-
annotation S / V matrices and the `tau` table to a JSON bundle
consumable by `gsem enrich`.

EXAMPLE:
    gsem s_ldsc \\
      --traits T1.sumstats.gz \\
      --ld baselineLD_v2.2/ \\
      --wld weights_hm3_no_hla/ \\
      --frq 1000G_Phase3_frq/ \\
      --out sldsc.json"
    )]
    SLdsc(SLdscArgs),
    /// Compute model-implied genetic correlation matrix
    #[command(long_about = "\
Fit a SEM and return the model-implied genetic correlation matrix plus
its sampling covariance. DWLS is the default estimator; pass `--ml` to
use ML instead.

EXAMPLE:
    gsem rgmodel --covstruc ldsc.json --out rg.tsv")]
    Rgmodel(RgmodelArgs),
    /// Joint analysis of multiple SNPs with LD
    #[command(
        name = "multiSNP",
        long_about = "\
Fit a SEM that regresses a common factor on multiple SNPs
simultaneously, accounting for LD between them.

EXAMPLE:
    gsem multiSNP \\
      --covstruc ldsc.json \\
      --snps merged_sumstats.tsv \\
      --snp-list rs1 rs2 rs3 \\
      --model 'F1 =~ NA*V1 + V2\\nF1 ~ rs1 + rs2 + rs3\\nF1 ~~ 1*F1' \\
      --out multi.tsv"
    )]
    MultiSnp(MultiSnpArgs),
    /// Simulate GWAS summary statistics
    #[command(
        name = "simLDSC",
        long_about = "\
Simulate multivariate GWAS summary statistics given a target genetic
covariance matrix.  Writes `iter<r>GWAS<i>.sumstats.gz` for each
simulated trait (matching R GenomicSEM::simLDSC's file layout).

EXAMPLE:
    gsem simLDSC \\
      --covmat covmat.tsv \\
      --N 1e5 1e5 \\
      --ld eur_w_ld_chr/"
    )]
    Simulate(SimulateArgs),
    /// High-Definition Likelihood estimation of genetic covariance
    #[command(long_about = "\
Run HDL (High-Definition Likelihood) estimation.  Output shape is
identical to `gsem ldsc`, so downstream subcommands consume it the same
way.

EXAMPLE:
    gsem hdl \\
      --traits T1.sumstats.gz T2.sumstats.gz \\
      --ld UKB_imputed_SVD_eigen99_extraction/ \\
      --n-ref 335265 \\
      --out hdl.json")]
    Hdl(HdlArgs),
    /// Generalized Least Squares regression on genetic parameters
    #[command(
        name = "summaryGLS",
        long_about = "\
GLS regression of a response vector on a predictor matrix given a
response covariance.

EXAMPLE:
    gsem summaryGLS \\
      --y y.tsv --v-y vy.tsv --predictors x.tsv --intercept \\
      --out gls.tsv"
    )]
    SummaryGls(SummaryGlsArgs),
}

#[derive(Args)]
struct MungeArgs {
    #[arg(short, long, num_args = 1..)]
    files: Vec<PathBuf>,
    #[arg(long)]
    hm3: PathBuf,
    #[arg(long, num_args = 1..)]
    trait_names: Option<Vec<String>>,
    #[arg(long, default_value = "0.9")]
    info_filter: f64,
    #[arg(long, default_value = "0.01")]
    maf_filter: f64,
    #[arg(short, long)]
    n: Option<f64>,
    #[arg(long)]
    column_names: Option<String>,
    #[arg(short, long, default_value = ".")]
    out: PathBuf,
}

#[derive(Args)]
struct LdscArgs {
    #[arg(short, long, num_args = 1..)]
    traits: Vec<PathBuf>,
    #[arg(long)]
    sample_prev: Option<String>,
    #[arg(long)]
    pop_prev: Option<String>,
    #[arg(long)]
    ld: PathBuf,
    #[arg(long)]
    wld: Option<PathBuf>,
    #[arg(long, default_value = "200")]
    n_blocks: usize,
    #[arg(long)]
    chisq_max: Option<f64>,
    #[arg(long, default_value = "22")]
    chr: usize,
    #[arg(long, default_value = "all")]
    select: String,
    #[arg(long)]
    stand: bool,
    #[arg(short, long, default_value = "ldsc_result.json")]
    out: PathBuf,
}

#[derive(Args)]
struct SemArgs {
    #[arg(long)]
    covstruc: PathBuf,
    #[arg(long)]
    model: Option<String>,
    #[arg(long)]
    model_file: Option<PathBuf>,
    #[arg(long, default_value = "DWLS")]
    estimation: String,
    #[arg(long)]
    std_lv: bool,
    #[arg(long)]
    fix_resid: bool,
    #[arg(long)]
    q_factor: bool,
    #[arg(short, long, default_value = "sem_result.tsv")]
    out: PathBuf,
}

#[derive(Args)]
struct SumstatsArgs {
    #[arg(short, long, num_args = 1..)]
    files: Vec<PathBuf>,
    #[arg(long, name = "ref")]
    ref_file: PathBuf,
    #[arg(long, num_args = 1..)]
    trait_names: Option<Vec<String>>,
    #[arg(long, default_value = "0.6")]
    info_filter: f64,
    #[arg(long, default_value = "0.01")]
    maf_filter: f64,
    /// Keep multi-character (indel) alleles in the merged output.
    /// Note: downstream `gsem userGWAS` / `commonfactorGWAS` loading
    /// only accepts single-nucleotide alleles (A/C/G/T); any indel
    /// rows will be dropped with a warning at load time.
    #[arg(long)]
    keep_indel: bool,
    #[arg(short, long, default_value = "merged_sumstats.tsv")]
    out: PathBuf,
    /// Number of threads for parallel file reads. Defaults to rayon's
    /// default (typically one per logical core); capped internally at
    /// `files.len() + 1` since we parallelize across files.
    #[arg(long)]
    threads: Option<usize>,
}

#[derive(Args)]
struct CommonFactorArgs {
    #[arg(long)]
    covstruc: PathBuf,
    #[arg(long, default_value = "DWLS")]
    estimation: String,
    #[arg(short, long, default_value = "commonfactor_result.tsv")]
    out: PathBuf,
}

#[derive(Args)]
struct GwasArgs {
    #[arg(long)]
    covstruc: PathBuf,
    #[arg(long)]
    sumstats: PathBuf,
    #[arg(long)]
    model: Option<String>,
    #[arg(long)]
    model_file: Option<PathBuf>,
    #[arg(long, default_value = "DWLS")]
    estimation: String,
    #[arg(long, default_value = "standard")]
    gc: String,
    #[arg(long)]
    threads: Option<usize>,
    #[arg(long)]
    std_lv: bool,
    #[arg(long)]
    sub: Option<String>,
    #[arg(long)]
    smooth_check: bool,
    #[arg(long)]
    q_snp: bool,
    #[arg(long)]
    fix_measurement: bool,
    #[arg(long)]
    twas: bool,
    #[arg(short, long, default_value = "gwas_result.tsv")]
    out: PathBuf,
    /// Save a Manhattan plot (SVG) of the first free SNP parameter.
    /// Requires CHR/BP columns in the merged sumstats input.
    #[arg(long)]
    save_plot_manhattan: Option<PathBuf>,
    /// Save a QQ plot (SVG) of the first free SNP parameter's p-values.
    #[arg(long)]
    save_plot_qq: Option<PathBuf>,
}

#[derive(Args)]
struct CommonfactorGwasArgs {
    #[arg(long)]
    covstruc: PathBuf,
    #[arg(long)]
    sumstats: PathBuf,
    #[arg(long, default_value = "standard")]
    gc: String,
    #[arg(long)]
    threads: Option<usize>,
    #[arg(short, long, default_value = "commonfactor_gwas_result.tsv")]
    out: PathBuf,
    /// Save a Manhattan plot (SVG).  Requires CHR/BP columns in the merged sumstats input.
    #[arg(long)]
    save_plot_manhattan: Option<PathBuf>,
    /// Save a QQ plot (SVG).
    #[arg(long)]
    save_plot_qq: Option<PathBuf>,
}

#[derive(Args)]
struct WriteModelArgs {
    #[arg(long)]
    loadings: PathBuf,
    #[arg(long, num_args = 1..)]
    names: Vec<String>,
    #[arg(long, default_value = "0.3")]
    cutoff: f64,
    #[arg(long)]
    fix_resid: bool,
    #[arg(long)]
    bifactor: bool,
    #[arg(short, long, default_value = "model.txt")]
    out: PathBuf,
}

#[derive(Args)]
struct ParallelAnalysisArgs {
    #[arg(long)]
    covstruc: PathBuf,
    #[arg(long, default_value = "500")]
    n_sim: usize,
    #[arg(short, long, default_value = "pa_result.tsv")]
    out: PathBuf,
    /// Save a scree plot (SVG) comparing observed vs simulated eigenvalues.
    #[arg(long)]
    save_plot: Option<PathBuf>,
}

#[derive(Args)]
struct EnrichArgs {
    #[arg(long)]
    input: PathBuf,
    #[arg(short, long, default_value = "enrich_result.tsv")]
    out: PathBuf,
}

#[derive(Args)]
struct SLdscArgs {
    #[arg(short, long, num_args = 1..)]
    traits: Vec<PathBuf>,
    #[arg(long)]
    sample_prev: Option<String>,
    #[arg(long)]
    pop_prev: Option<String>,
    #[arg(long)]
    ld: PathBuf,
    #[arg(long)]
    wld: Option<PathBuf>,
    #[arg(long, default_value = "200")]
    n_blocks: usize,
    #[arg(long, default_value = "22")]
    chr: usize,
    #[arg(long, default_value = "all")]
    select: String,
    #[arg(short, long, default_value = "s_ldsc_result.json")]
    out: PathBuf,
}

#[derive(Args)]
struct RgmodelArgs {
    #[arg(long)]
    covstruc: PathBuf,
    #[arg(long, default_value = "DWLS")]
    estimation: String,
    #[arg(long)]
    model: Option<String>,
    #[arg(long, default_value_t = false)]
    std_lv: bool,
    #[arg(short, long, default_value = "rgmodel_result.tsv")]
    out: PathBuf,
}

#[derive(Args)]
struct MultiSnpArgs {
    #[arg(long)]
    covstruc: PathBuf,
    #[arg(long)]
    sumstats: PathBuf,
    #[arg(long)]
    ld_matrix: PathBuf,
    #[arg(long)]
    model: Option<String>,
    #[arg(long)]
    model_file: Option<PathBuf>,
    #[arg(long, default_value = "DWLS")]
    estimation: String,
    #[arg(short, long, default_value = "multi_snp_result.tsv")]
    out: PathBuf,
}

#[derive(Args)]
struct SimulateArgs {
    #[arg(long)]
    covstruc: PathBuf,
    #[arg(long)]
    n_per_trait: String,
    #[arg(long)]
    ld: PathBuf,
    #[arg(short, long, default_value = "simulated_sumstats.tsv")]
    out: PathBuf,
}

#[derive(Args)]
struct HdlArgs {
    #[arg(short, long, num_args = 1..)]
    traits: Vec<PathBuf>,
    #[arg(long)]
    sample_prev: Option<String>,
    #[arg(long)]
    pop_prev: Option<String>,
    #[arg(long)]
    ld_path: PathBuf,
    #[arg(long, default_value = "335265")]
    n_ref: f64,
    #[arg(long, default_value = "piecewise")]
    method: String,
    #[arg(short, long, default_value = "hdl_result.json")]
    out: PathBuf,
}

#[derive(Args)]
struct SummaryGlsArgs {
    #[arg(long)]
    x: PathBuf,
    #[arg(long)]
    y: PathBuf,
    #[arg(long)]
    v: PathBuf,
    #[arg(long, default_value = "true")]
    intercept: bool,
    #[arg(short, long, default_value = "gls_result.tsv")]
    out: PathBuf,
}

fn main() -> Result<()> {
    env_logger::init();
    let cli = Cli::parse();

    match cli.command {
        Commands::Munge(args) => run_munge(args),
        Commands::Ldsc(args) => run_ldsc(args),
        Commands::Sem(args) => run_sem(args),
        Commands::Sumstats(args) => run_sumstats(args),
        Commands::CommonFactor(args) => run_commonfactor_cmd(args),
        Commands::Gwas(args) => run_gwas(args),
        Commands::CommonfactorGwas(args) => run_commonfactor_gwas(args),
        Commands::WriteModel(args) => run_write_model(args),
        Commands::ParallelAnalysis(args) => run_parallel_analysis(args),
        Commands::SLdsc(args) => run_s_ldsc(args),
        Commands::Enrich(args) => run_enrich(args),
        Commands::Rgmodel(args) => run_rgmodel_cmd(args),
        Commands::MultiSnp(args) => run_multi_snp_cmd(args),
        Commands::Simulate(args) => run_simulate(args),
        Commands::Hdl(args) => run_hdl(args),
        Commands::SummaryGls(args) => run_summary_gls(args),
    }
}

fn run_munge(args: MungeArgs) -> Result<()> {
    // Read reference
    eprintln!("Reading reference panel: {}", args.hm3.display());
    let reference = munge::read_reference(&args.hm3).context("failed to read HapMap3 reference")?;
    eprintln!("Loaded {} reference SNPs", reference.len());

    // Parse column name overrides from comma-separated KEY=VALUE pairs
    let column_overrides = args.column_names.map(|s| {
        s.split(',')
            .filter_map(|pair| {
                let mut parts = pair.splitn(2, '=');
                let key = parts.next()?.trim().to_string();
                let value = parts.next()?.trim().to_string();
                if key.is_empty() || value.is_empty() {
                    None
                } else {
                    Some((key, value))
                }
            })
            .collect::<HashMap<String, String>>()
    });

    let config = munge::MungeConfig {
        info_filter: args.info_filter,
        maf_filter: args.maf_filter,
        n_override: args.n,
        column_overrides,
    };

    let trait_names = args.trait_names.as_deref();
    for (i, file) in args.files.iter().enumerate() {
        let trait_name = trait_names
            .and_then(|names| names.get(i))
            .map(|s| s.as_str())
            .or_else(|| file.file_stem().and_then(|s| s.to_str()))
            .unwrap_or("trait");

        let out_path = args.out.join(format!("{trait_name}.sumstats.gz"));
        eprintln!("Munging: {} -> {}", file.display(), out_path.display());

        munge::munge_and_write(file, &reference, &config, &out_path)
            .with_context(|| format!("failed to munge {}", file.display()))?;
    }

    eprintln!("Done.");
    Ok(())
}

fn run_sumstats(args: SumstatsArgs) -> Result<()> {
    let names: Vec<String> = args.trait_names.unwrap_or_else(|| {
        args.files
            .iter()
            .map(|f| {
                f.file_stem()
                    .and_then(|s| s.to_str())
                    .unwrap_or("trait")
                    .to_string()
            })
            .collect()
    });

    let k = args.files.len();
    let config = gsem::sumstats::SumstatsConfig {
        info_filter: args.info_filter,
        maf_filter: args.maf_filter,
        n_overrides: vec![None; k],
        se_logit: vec![false; k],
        ols: vec![false; k],
        linprob: vec![false; k],
        keep_indel: args.keep_indel,
        keep_ambig: false,
        beta_overrides: Vec::new(),
        direct_filter: false,
        num_threads: args.threads,
    };

    let file_refs: Vec<&Path> = args.files.iter().map(|p| p.as_path()).collect();
    eprintln!("Merging {} GWAS files...", k);
    gsem::sumstats::merge_sumstats(&file_refs, &args.ref_file, &names, &config, &args.out)?;
    eprintln!("Done.");
    Ok(())
}

fn run_ldsc(args: LdscArgs) -> Result<()> {
    eprintln!("Reading {} trait files...", args.traits.len());

    // Read summary stats
    let mut trait_data = Vec::new();
    for path in &args.traits {
        let records = gwas_reader::read_sumstats(path)
            .with_context(|| format!("failed to read {}", path.display()))?;
        let n_snps = records.len();
        eprintln!("  {}: {} SNPs", path.display(), n_snps);

        trait_data.push(gsem_ldsc::TraitSumstats {
            snp: records.iter().map(|r| r.snp.clone()).collect(),
            z: records.iter().map(|r| r.z).collect(),
            n: records.iter().map(|r| r.n).collect(),
            a1: records.iter().map(|r| r.a1.clone()).collect(),
            a2: records.iter().map(|r| r.a2.clone()).collect(),
        });
    }

    // Read LD scores
    let wld_dir = args.wld.as_deref().unwrap_or(&args.ld);
    let chromosomes = parse_chromosome_selection(&args.select, args.chr);
    let ld_data = gsem::io::ld_reader::read_ld_scores(&args.ld, wld_dir, &chromosomes)
        .context("failed to read LD scores")?;

    eprintln!(
        "Loaded {} LD score SNPs, M={}",
        ld_data.records.len(),
        ld_data.total_m
    );

    // Parse prevalences
    let k = args.traits.len();
    let sp = parse_prevalences(&args.sample_prev, k);
    let pp = parse_prevalences(&args.pop_prev, k);

    let ld_snps: Vec<String> = ld_data.records.iter().map(|r| r.snp.clone()).collect();
    let ld_scores: Vec<f64> = ld_data.records.iter().map(|r| r.l2).collect();

    let config = gsem_ldsc::LdscConfig {
        n_blocks: args.n_blocks,
        chisq_max: args.chisq_max,
        num_threads: None,
    };

    let n_pairs = k * (k + 1) / 2;
    let pb = indicatif::ProgressBar::new(n_pairs as u64);
    pb.set_style(
        indicatif::ProgressStyle::with_template(
            "[{elapsed_precise}] {bar:40.cyan/blue} {pos}/{len} trait pairs ({eta})",
        )
        .unwrap(),
    );

    let result = gsem_ldsc::ldsc(
        &trait_data,
        &sp,
        &pp,
        &ld_scores,
        &ld_data.w_ld,
        &ld_snps,
        ld_data.total_m,
        &config,
        Some(&|| pb.inc(1)),
    )?;
    pb.finish_with_message("complete");

    // Write JSON output
    let json = result.to_json_string()?;
    std::fs::write(&args.out, &json)
        .with_context(|| format!("failed to write {}", args.out.display()))?;

    eprintln!("LDSC complete. Results written to {}", args.out.display());

    // Print S matrix
    let k = result.s.nrows();
    eprintln!("\nGenetic covariance matrix (S):");
    for i in 0..k {
        let row: Vec<String> = (0..k)
            .map(|j| format!("{:8.4}", result.s[(i, j)]))
            .collect();
        eprintln!("  {}", row.join(" "));
    }

    eprintln!("\nIntercepts (I):");
    for i in 0..k {
        let row: Vec<String> = (0..k)
            .map(|j| format!("{:8.4}", result.i_mat[(i, j)]))
            .collect();
        eprintln!("  {}", row.join(" "));
    }

    if args.stand {
        eprintln!("\nStandardized genetic correlation matrix (S_Stand):");
        for i in 0..k {
            let row: Vec<String> = (0..k)
                .map(|j| {
                    let d_i = result.s[(i, i)].sqrt();
                    let d_j = result.s[(j, j)].sqrt();
                    if d_i > 0.0 && d_j > 0.0 {
                        format!("{:8.4}", result.s[(i, j)] / (d_i * d_j))
                    } else {
                        format!("{:8.4}", 0.0)
                    }
                })
                .collect();
            eprintln!("  {}", row.join(" "));
        }
    }

    Ok(())
}

fn run_s_ldsc(args: SLdscArgs) -> Result<()> {
    eprintln!("Reading {} trait files...", args.traits.len());

    // Read summary stats
    let mut trait_data = Vec::new();
    for path in &args.traits {
        let records = gwas_reader::read_sumstats(path)
            .with_context(|| format!("failed to read {}", path.display()))?;
        let n_snps = records.len();
        eprintln!("  {}: {} SNPs", path.display(), n_snps);

        trait_data.push(gsem_ldsc::TraitSumstats {
            snp: records.iter().map(|r| r.snp.clone()).collect(),
            z: records.iter().map(|r| r.z).collect(),
            n: records.iter().map(|r| r.n).collect(),
            a1: records.iter().map(|r| r.a1.clone()).collect(),
            a2: records.iter().map(|r| r.a2.clone()).collect(),
        });
    }

    // Read annotation LD scores
    let wld_dir = args.wld.as_deref().unwrap_or(&args.ld);
    let chromosomes = parse_chromosome_selection(&args.select, args.chr);
    let annot_data = gsem_ldsc::annot_reader::read_annot_ld_scores(&args.ld, wld_dir, &chromosomes)
        .context("failed to read annotation LD scores")?;

    eprintln!(
        "Loaded {} LD score SNPs, {} annotations, M_total={}",
        annot_data.snps.len(),
        annot_data.annotation_names.len(),
        annot_data.m_annot.iter().sum::<f64>()
    );

    // Parse prevalences
    let k = args.traits.len();
    let sp = parse_prevalences(&args.sample_prev, k);
    let pp = parse_prevalences(&args.pop_prev, k);

    let config = gsem_ldsc::stratified::StratifiedLdscConfig {
        n_blocks: args.n_blocks,
        rm_flank: false,
        flank_kb: 500,
    };

    eprintln!("Running stratified LDSC...");
    let result = gsem_ldsc::stratified::s_ldsc(
        &trait_data,
        &sp,
        &pp,
        &annot_data.annot_ld,
        &annot_data.w_ld,
        &annot_data.snps,
        &annot_data.annotation_names,
        &annot_data.m_annot,
        &config,
        Some(&annot_data.chr),
        Some(&annot_data.bp),
    )?;

    // Write JSON output
    let json = result.to_json_string()?;
    std::fs::write(&args.out, &json)
        .with_context(|| format!("failed to write {}", args.out.display()))?;

    eprintln!(
        "Stratified LDSC complete. Results written to {}",
        args.out.display()
    );

    // Print summary
    let n_annot = result.annotations.len();
    eprintln!("\nAnnotations ({n_annot}):");
    for (a, name) in result.annotations.iter().enumerate() {
        let h2_diag: Vec<String> = (0..result.s_annot[a].nrows())
            .map(|i| format!("{:.6}", result.s_annot[a][(i, i)]))
            .collect();
        eprintln!(
            "  {name}: M={:.0}, prop={:.4}, h2_diag=[{}]",
            result.m_annot[a],
            result.prop[a],
            h2_diag.join(", ")
        );
    }

    eprintln!("\nIntercepts (I):");
    let ki = result.i_mat.nrows();
    for i in 0..ki {
        let row: Vec<String> = (0..ki)
            .map(|j| format!("{:8.4}", result.i_mat[(i, j)]))
            .collect();
        eprintln!("  {}", row.join(" "));
    }

    Ok(())
}

fn run_sem(args: SemArgs) -> Result<()> {
    // Read LDSC result
    let json = std::fs::read_to_string(&args.covstruc)
        .with_context(|| format!("failed to read {}", args.covstruc.display()))?;
    let ldsc_result = gsem_ldsc::LdscResult::from_json_string(&json)?;

    // Get model string
    let model_str = if let Some(m) = args.model {
        m
    } else if let Some(f) = args.model_file {
        std::fs::read_to_string(&f).with_context(|| format!("failed to read {}", f.display()))?
    } else {
        anyhow::bail!("must provide --model or --model-file");
    };

    let estimation = gsem_sem::EstimationMethod::from_str_lossy(&args.estimation);
    let std_lv = args.std_lv;
    eprintln!("Fitting SEM model (estimation={estimation}, std_lv={std_lv})...");

    // Parse and fit
    let mut pt =
        gsem_sem::syntax::parse_model(&model_str, std_lv).map_err(|e| anyhow::anyhow!("{e}"))?;
    let k = ldsc_result.s.nrows();
    let obs_names: Vec<String> = (0..k).map(|i| format!("V{}", i + 1)).collect();
    let mut sem_model = gsem_sem::model::Model::from_partable(&pt, &obs_names);

    let kstar = k * (k + 1) / 2;
    let v_diag: Vec<f64> = (0..kstar).map(|i| ldsc_result.v[(i, i)]).collect();

    let fit = match estimation {
        gsem_sem::EstimationMethod::Ml => {
            gsem_sem::estimator::fit_ml(&mut sem_model, &ldsc_result.s, 1000, None)
        }
        gsem_sem::EstimationMethod::Dwls => {
            gsem_sem::estimator::fit_dwls(&mut sem_model, &ldsc_result.s, &v_diag, 1000, None)
        }
    };

    // If model failed to converge and fix_resid is set, add lower bounds on
    // residual variances (op == "~~" && lhs == rhs) and retry.
    let fit = if !fit.converged && args.fix_resid {
        eprintln!(
            "Model did not converge; retrying with residual variance lower bounds (fix_resid)..."
        );
        for row in &mut pt.rows {
            if row.op == gsem_sem::syntax::Op::Covariance
                && row.lhs == row.rhs
                && row.free > 0
                && row.lower_bound.is_none()
            {
                row.lower_bound = Some(0.0001);
            }
        }
        // Rebuild model with updated lower bounds
        sem_model = gsem_sem::model::Model::from_partable(&pt, &obs_names);
        match estimation {
            gsem_sem::EstimationMethod::Ml => {
                gsem_sem::estimator::fit_ml(&mut sem_model, &ldsc_result.s, 1000, None)
            }
            gsem_sem::EstimationMethod::Dwls => {
                gsem_sem::estimator::fit_dwls(&mut sem_model, &ldsc_result.s, &v_diag, 1000, None)
            }
        }
    } else {
        fit
    };

    eprintln!("Converged: {}", fit.converged);
    eprintln!("Objective: {:.6}", fit.objective);

    // Write results
    let mut output = String::from("lhs\top\trhs\test\n");
    let free_rows: Vec<_> = pt.rows.iter().filter(|r| r.free > 0).collect();
    for (i, row) in free_rows.iter().enumerate() {
        let est = fit.params.get(i).copied().unwrap_or(0.0);
        output.push_str(&format!(
            "{}\t{}\t{}\t{:.6}\n",
            row.lhs, row.op, row.rhs, est
        ));
    }

    // Q_Factor heterogeneity test
    if args.q_factor {
        let sigma_hat = sem_model.implied_cov();
        let fi = gsem_sem::q_factor::factor_indicators(&pt, &obs_names);
        if fi.len() >= 2 {
            let q_results = gsem_sem::q_factor::compute_q_factor(
                &ldsc_result.s,
                &sigma_hat,
                &ldsc_result.v,
                &fi,
            )?;
            if !q_results.is_empty() {
                output.push_str("\n# Q_Factor heterogeneity test\n");
                output.push_str("factor1\tfactor2\tQ_chisq\tQ_df\tQ_pval\n");
                for qr in &q_results {
                    output.push_str(&format!(
                        "{}\t{}\t{:.6}\t{}\t{:.6e}\n",
                        qr.factor1, qr.factor2, qr.q_chisq, qr.q_df, qr.q_p
                    ));
                    eprintln!(
                        "Q_Factor({}, {}): chisq={:.4}, df={}, p={:.4e}",
                        qr.factor1, qr.factor2, qr.q_chisq, qr.q_df, qr.q_p
                    );
                }
            }
        } else {
            eprintln!("Q_Factor: skipped (fewer than 2 factors in model)");
        }
    }

    std::fs::write(&args.out, &output)
        .with_context(|| format!("failed to write {}", args.out.display()))?;

    eprintln!("Results written to {}", args.out.display());
    Ok(())
}

fn run_gwas(args: GwasArgs) -> Result<()> {
    // Set thread count
    if let Some(t) = args.threads {
        rayon::ThreadPoolBuilder::new()
            .num_threads(t)
            .build_global()
            .context("failed to initialize thread pool")?;
    }

    // Read LDSC result
    let json = std::fs::read_to_string(&args.covstruc)
        .with_context(|| format!("failed to read {}", args.covstruc.display()))?;
    let ldsc_result = gsem_ldsc::LdscResult::from_json_string(&json)?;

    // Get model string
    let model_str = if let Some(m) = args.model {
        m
    } else if let Some(f) = args.model_file {
        std::fs::read_to_string(&f)
            .with_context(|| format!("failed to read model file {}", f.display()))?
    } else {
        anyhow::bail!("must provide --model or --model-file");
    };

    let gc_mode: gsem::gwas::gc_correction::GcMode = args.gc.parse().expect("infallible");
    let k = ldsc_result.s.nrows();

    // Clamp intercept diagonals to >= 1 (matching R)
    let mut i_ld = ldsc_result.i_mat.to_owned();
    for i in 0..k {
        if i_ld[(i, i)] < 1.0 {
            i_ld[(i, i)] = 1.0;
        }
    }

    // Parse --sub filter list
    let sub_filters: Option<Vec<String>> = args.sub.map(|s| {
        s.split(',')
            .map(|entry| entry.trim().to_string())
            .filter(|entry| !entry.is_empty())
            .collect()
    });

    let estimation = gsem_sem::EstimationMethod::from_str_lossy(&args.estimation);

    if args.twas {
        run_gwas_twas(
            &args.sumstats,
            &model_str,
            estimation,
            &args.gc,
            gc_mode,
            k,
            args.std_lv,
            args.smooth_check,
            &ldsc_result,
            &i_ld,
            &sub_filters,
            &args.out,
        )
    } else {
        run_gwas_snp(
            &args.sumstats,
            &model_str,
            estimation,
            &args.gc,
            gc_mode,
            k,
            args.std_lv,
            args.smooth_check,
            &ldsc_result,
            &i_ld,
            &sub_filters,
            &args.out,
            args.save_plot_manhattan.as_deref(),
            args.save_plot_qq.as_deref(),
        )
    }
}

/// Standard SNP-based GWAS path.
#[allow(clippy::too_many_arguments)]
fn run_gwas_snp(
    sumstats: &Path,
    model_str: &str,
    estimation: gsem_sem::EstimationMethod,
    gc: &str,
    gc_mode: gsem::gwas::gc_correction::GcMode,
    k: usize,
    std_lv: bool,
    smooth_check: bool,
    ldsc_result: &gsem_ldsc::LdscResult,
    i_ld: &Mat<f64>,
    sub_filters: &Option<Vec<String>>,
    out: &Path,
    save_plot_manhattan: Option<&Path>,
    save_plot_qq: Option<&Path>,
) -> Result<()> {
    // Read merged sumstats
    eprintln!("Reading merged sumstats: {}", sumstats.display());
    let merged = gsem::io::sumstats_reader::read_merged_sumstats(sumstats)
        .with_context(|| format!("failed to read {}", sumstats.display()))?;

    let n_snps = merged.len();
    eprintln!("GWAS: {n_snps} SNPs, {k} traits, estimation={estimation}, gc={gc}");

    if merged.trait_names.len() != k {
        anyhow::bail!(
            "trait count mismatch: LDSC has {k} traits, sumstats has {} (beta.* columns)",
            merged.trait_names.len()
        );
    }

    // Extract per-SNP arrays for user_gwas (borrowed slices, no copy)
    let beta_snp = merged.beta_rows();
    let se_snp = merged.se_rows();
    let var_snp = merged.var_snp();

    // Parse model once upfront
    let model = gsem_sem::syntax::parse_model(model_str, std_lv)
        .map_err(|e| anyhow::anyhow!("model parse error: {e}"))?;

    let config = gsem::gwas::user_gwas::UserGwasConfig {
        model,
        estimation,
        gc: gc_mode,
        max_iter: 500,
        smooth_check,
        snp_se: None,
        variant_label: gsem::gwas::user_gwas::VariantLabel::Snp,
        q_snp: false,
        fix_measurement: true,
        num_threads: None,
    };

    eprintln!("Running GWAS across {n_snps} SNPs...");
    let pb = indicatif::ProgressBar::new(n_snps as u64);
    pb.set_style(
        indicatif::ProgressStyle::with_template(
            "[{elapsed_precise}] {bar:40.cyan/blue} {pos}/{len} SNPs ({eta})",
        )
        .unwrap(),
    );
    let results = gsem::gwas::user_gwas::run_user_gwas(
        &config,
        &ldsc_result.s,
        &ldsc_result.v,
        i_ld,
        &beta_snp,
        &se_snp,
        &var_snp,
        Some(&|| pb.inc(1)),
    );
    pb.finish_with_message("complete");

    // Write TSV output
    let mut output = String::from("SNP\tlhs\top\trhs\test\tse\tz\tp\tchisq\tdf\tconverged\n");
    for snp_result in &results {
        let snp_name = &merged.snp[snp_result.snp_idx];
        for param in &snp_result.params {
            // Apply --sub filter: only include params matching the filter list
            if let Some(filters) = sub_filters {
                let param_key = format!("{}{}{}", param.lhs, param.op, param.rhs);
                if !filters.iter().any(|f| f == &param_key) {
                    continue;
                }
            }

            output.push_str(&format!(
                "{}\t{}\t{}\t{}\t{:.6}\t{:.6}\t{:.4}\t{:.6e}\t{:.4}\t{}\t{}\n",
                snp_name,
                param.lhs,
                param.op,
                param.rhs,
                param.est,
                param.se,
                param.z_stat,
                param.p_value,
                snp_result.chisq,
                snp_result.chisq_df,
                snp_result.converged,
            ));
        }
    }

    std::fs::write(out, &output).with_context(|| format!("failed to write {}", out.display()))?;

    let n_converged = results.iter().filter(|r| r.converged).count();
    eprintln!(
        "GWAS complete: {n_snps} SNPs, {n_converged} converged. Results: {}",
        out.display()
    );

    // Generate plots from the first free parameter (or first --sub match) per SNP
    if save_plot_manhattan.is_some() || save_plot_qq.is_some() {
        let (p_values, manhattan_pts) = collect_plot_data(&results, &merged, sub_filters);
        if let Some(qq_path) = save_plot_qq {
            gsem::plot::qq::qq_plot(&p_values, "GWAS QQ Plot", qq_path)?;
            eprintln!("QQ plot: {}", qq_path.display());
        }
        if let Some(man_path) = save_plot_manhattan {
            if manhattan_pts.is_empty() {
                log::warn!(
                    "Manhattan plot skipped: merged sumstats {} has no CHR/BP columns",
                    sumstats.display()
                );
            } else {
                gsem::plot::manhattan::manhattan_plot(
                    &manhattan_pts,
                    "GWAS Manhattan Plot",
                    man_path,
                    Some(100_000),
                )?;
                eprintln!("Manhattan plot: {}", man_path.display());
            }
        }
    }

    Ok(())
}

/// Collect per-SNP p-values (and optional CHR/BP points) for plotting.
///
/// Picks the first free parameter per SNP that matches `sub_filters`
/// (or just the first free parameter when no filter is set).
fn collect_plot_data(
    results: &[gsem::gwas::user_gwas::SnpResult],
    merged: &gsem::io::sumstats_reader::MergedSumstats,
    sub_filters: &Option<Vec<String>>,
) -> (Vec<f64>, Vec<gsem::plot::ManhattanPoint>) {
    let mut p_values = Vec::with_capacity(results.len());
    let mut manhattan = Vec::new();
    let chr_vec = merged.chr.as_ref();
    let bp_vec = merged.bp.as_ref();
    for snp_result in results {
        let param = snp_result.params.iter().find(|p| {
            if let Some(filters) = sub_filters {
                let key = format!("{}{}{}", p.lhs, p.op, p.rhs);
                filters.iter().any(|f| f == &key)
            } else {
                true
            }
        });
        let Some(param) = param else { continue };
        if !param.p_value.is_finite() {
            continue;
        }
        p_values.push(param.p_value);
        if let (Some(chrs), Some(bps)) = (chr_vec, bp_vec)
            && let (Some(&chr), Some(&bp)) =
                (chrs.get(snp_result.snp_idx), bps.get(snp_result.snp_idx))
        {
            manhattan.push(gsem::plot::ManhattanPoint {
                chr,
                bp,
                neg_log10_p: gsem::plot::neg_log10_p(param.p_value),
            });
        }
    }
    (p_values, manhattan)
}

/// TWAS mode: uses Gene/Panel/HSQ instead of SNP/A1/A2/MAF.
///
/// Key differences from standard GWAS:
/// - Reads TWAS-format sumstats (Gene/Panel/HSQ columns)
/// - Uses HSQ (heritability of expression) as var_snp instead of 2*MAF*(1-MAF)
/// - Replaces "SNP" with "Gene" in model syntax
/// - Outputs Gene/Panel/HSQ columns instead of SNP column
#[allow(clippy::too_many_arguments)]
fn run_gwas_twas(
    sumstats: &Path,
    model_str: &str,
    estimation: gsem_sem::EstimationMethod,
    gc: &str,
    gc_mode: gsem::gwas::gc_correction::GcMode,
    k: usize,
    std_lv: bool,
    smooth_check: bool,
    ldsc_result: &gsem_ldsc::LdscResult,
    i_ld: &Mat<f64>,
    sub_filters: &Option<Vec<String>>,
    out: &Path,
) -> Result<()> {
    // Read TWAS sumstats
    eprintln!("Reading TWAS sumstats: {}", sumstats.display());
    let twas_data = gsem::io::twas_reader::read_twas_sumstats(sumstats)
        .with_context(|| format!("failed to read TWAS sumstats {}", sumstats.display()))?;

    let n_genes = twas_data.genes.len();
    eprintln!("TWAS: {n_genes} genes, {k} traits, estimation={estimation}, gc={gc}");

    if twas_data.trait_names.len() != k {
        anyhow::bail!(
            "trait count mismatch: LDSC has {k} traits, TWAS sumstats has {} (beta.* columns)",
            twas_data.trait_names.len()
        );
    }

    // Extract per-gene arrays (borrowed slices, no copy)
    let beta_gene: Vec<&[f64]> = twas_data.genes.iter().map(|g| g.beta.as_slice()).collect();
    let se_gene: Vec<&[f64]> = twas_data.genes.iter().map(|g| g.se.as_slice()).collect();
    // In TWAS mode, var_snp = HSQ (heritability of expression)
    let var_gene: Vec<f64> = twas_data.genes.iter().map(|g| g.hsq).collect();

    // Replace "SNP" with "Gene" in model syntax so that the observed variable
    // name matches what userGWAS expects in the first position
    let twas_model_str = model_str.replace("SNP", "Gene");
    let model = gsem_sem::syntax::parse_model(&twas_model_str, std_lv)
        .map_err(|e| anyhow::anyhow!("model parse error: {e}"))?;

    let config = gsem::gwas::user_gwas::UserGwasConfig {
        model,
        estimation,
        gc: gc_mode,
        max_iter: 500,
        smooth_check,
        snp_se: None,
        variant_label: gsem::gwas::user_gwas::VariantLabel::Gene,
        q_snp: false,
        fix_measurement: true,
        num_threads: None,
    };

    eprintln!("Running TWAS across {n_genes} genes...");
    let pb = indicatif::ProgressBar::new(n_genes as u64);
    pb.set_style(
        indicatif::ProgressStyle::with_template(
            "[{elapsed_precise}] {bar:40.cyan/blue} {pos}/{len} genes ({eta})",
        )
        .unwrap(),
    );
    let results = gsem::gwas::user_gwas::run_user_gwas(
        &config,
        &ldsc_result.s,
        &ldsc_result.v,
        i_ld,
        &beta_gene,
        &se_gene,
        &var_gene,
        Some(&|| pb.inc(1)),
    );
    pb.finish_with_message("complete");

    // Write TSV output with TWAS columns
    let mut output =
        String::from("Gene\tPanel\tHSQ\tlhs\top\trhs\test\tse\tz\tp\tchisq\tdf\tconverged\n");
    for gene_result in &results {
        let gene = &twas_data.genes[gene_result.snp_idx];
        for param in &gene_result.params {
            // Apply --sub filter: only include params matching the filter list
            if let Some(filters) = sub_filters {
                let param_key = format!("{}{}{}", param.lhs, param.op, param.rhs);
                if !filters.iter().any(|f| f == &param_key) {
                    continue;
                }
            }

            output.push_str(&format!(
                "{}\t{}\t{:.6}\t{}\t{}\t{}\t{:.6}\t{:.6}\t{:.4}\t{:.6e}\t{:.4}\t{}\t{}\n",
                gene.gene,
                gene.panel,
                gene.hsq,
                param.lhs,
                param.op,
                param.rhs,
                param.est,
                param.se,
                param.z_stat,
                param.p_value,
                gene_result.chisq,
                gene_result.chisq_df,
                gene_result.converged,
            ));
        }
    }

    std::fs::write(out, &output).with_context(|| format!("failed to write {}", out.display()))?;

    let n_converged = results.iter().filter(|r| r.converged).count();
    eprintln!(
        "TWAS complete: {n_genes} genes, {n_converged} converged. Results: {}",
        out.display()
    );
    Ok(())
}

fn run_commonfactor_cmd(args: CommonFactorArgs) -> Result<()> {
    let json = std::fs::read_to_string(&args.covstruc)
        .with_context(|| format!("failed to read {}", args.covstruc.display()))?;
    let ldsc_result = gsem_ldsc::LdscResult::from_json_string(&json)?;

    let estimation = gsem_sem::EstimationMethod::from_str_lossy(&args.estimation);
    eprintln!("Fitting common factor model (estimation={estimation})...");

    let result =
        gsem_sem::commonfactor::run_commonfactor(&ldsc_result.s, &ldsc_result.v, estimation)?;

    eprintln!(
        "Converged. Chi-sq={:.4}, df={}, CFI={:.4}, SRMR={:.4}",
        result.fit.chisq, result.fit.df, result.fit.cfi, result.fit.srmr
    );

    // Write TSV output
    let mut output = String::from("lhs\top\trhs\test\tse\tz\tp\n");
    for param in &result.parameters {
        output.push_str(&format!(
            "{}\t{}\t{}\t{:.6}\t{:.6}\t{:.4}\t{:.6e}\n",
            param.lhs, param.op, param.rhs, param.est, param.se, param.z, param.p
        ));
    }

    // Append fit indices
    output.push_str(&format!(
        "\n# Model fit: chisq={:.4} df={} p={:.6e} AIC={:.4} CFI={:.4} SRMR={:.4}\n",
        result.fit.chisq,
        result.fit.df,
        result.fit.p_chisq,
        result.fit.aic,
        result.fit.cfi,
        result.fit.srmr
    ));

    std::fs::write(&args.out, &output)
        .with_context(|| format!("failed to write {}", args.out.display()))?;

    eprintln!("Results written to {}", args.out.display());
    Ok(())
}

fn run_commonfactor_gwas(args: CommonfactorGwasArgs) -> Result<()> {
    // One-shot compatibility notice — see ARCHITECTURE.md §3.3.
    // Suppress by setting GSEMR_COMMONFACTOR_GWAS_QUIET=1.
    if std::env::var("GSEMR_COMMONFACTOR_GWAS_QUIET")
        .map(|v| v.is_empty())
        .unwrap_or(true)
    {
        eprintln!(
            "note: `gsem commonfactorGWAS` fits a single-factor model using fixed-variance\n\
             identification (F1 ~~ 1*F1, loadings free) with the fix_measurement baseline\n\
             optimization. This matches GenomicSEM::userGWAS on the equivalent model but does\n\
             NOT numerically match GenomicSEM::commonfactorGWAS, which uses marker-indicator\n\
             identification. On real GWAS data the two can disagree in sign and magnitude.\n\
             See ARCHITECTURE.md section 3.3 for the full rationale. Suppress with\n\
             GSEMR_COMMONFACTOR_GWAS_QUIET=1."
        );
    }

    // Set thread count
    if let Some(t) = args.threads {
        rayon::ThreadPoolBuilder::new()
            .num_threads(t)
            .build_global()
            .context("failed to initialize thread pool")?;
    }

    // Read LDSC result
    let json = std::fs::read_to_string(&args.covstruc)
        .with_context(|| format!("failed to read {}", args.covstruc.display()))?;
    let ldsc_result = gsem_ldsc::LdscResult::from_json_string(&json)?;

    let gc_mode: gsem::gwas::gc_correction::GcMode = args.gc.parse().expect("infallible");
    let k = ldsc_result.s.nrows();

    // Read merged sumstats
    eprintln!("Reading merged sumstats: {}", args.sumstats.display());
    let merged = gsem::io::sumstats_reader::read_merged_sumstats(&args.sumstats)
        .with_context(|| format!("failed to read {}", args.sumstats.display()))?;

    let n_snps = merged.len();
    let gc = &args.gc;
    eprintln!("Common factor GWAS: {n_snps} SNPs, {k} traits, gc={gc}");

    if merged.trait_names.len() != k {
        anyhow::bail!(
            "trait count mismatch: LDSC has {k} traits, sumstats has {} (beta.* columns)",
            merged.trait_names.len()
        );
    }

    let beta_snp = merged.beta_rows();
    let se_snp = merged.se_rows();
    let var_snp = merged.var_snp();

    // Clamp intercept diagonals to >= 1 (matching R)
    let mut i_ld = ldsc_result.i_mat.to_owned();
    for i in 0..k {
        if i_ld[(i, i)] < 1.0 {
            i_ld[(i, i)] = 1.0;
        }
    }

    eprintln!("Running common factor GWAS across {n_snps} SNPs...");
    let pb = indicatif::ProgressBar::new(n_snps as u64);
    pb.set_style(
        indicatif::ProgressStyle::with_template(
            "[{elapsed_precise}] {bar:40.cyan/blue} {pos}/{len} SNPs ({eta})",
        )
        .unwrap(),
    );
    let cf_config = gsem::gwas::common_factor::CommonFactorGwasConfig {
        gc: gc_mode,
        ..Default::default()
    };
    let results = gsem::gwas::common_factor::run_common_factor_gwas(
        &merged.trait_names,
        &ldsc_result.s,
        &ldsc_result.v,
        &i_ld,
        &beta_snp,
        &se_snp,
        &var_snp,
        &cf_config,
        Some(&|| pb.inc(1)),
    );
    pb.finish_with_message("complete");

    // Write TSV output (same format as user GWAS)
    let mut output = String::from("SNP\tlhs\top\trhs\test\tse\tz\tp\tchisq\tdf\tconverged\n");
    for snp_result in &results {
        let snp_name = &merged.snp[snp_result.snp_idx];
        for param in &snp_result.params {
            output.push_str(&format!(
                "{}\t{}\t{}\t{}\t{:.6}\t{:.6}\t{:.4}\t{:.6e}\t{:.4}\t{}\t{}\n",
                snp_name,
                param.lhs,
                param.op,
                param.rhs,
                param.est,
                param.se,
                param.z_stat,
                param.p_value,
                snp_result.chisq,
                snp_result.chisq_df,
                snp_result.converged,
            ));
        }
    }

    std::fs::write(&args.out, &output)
        .with_context(|| format!("failed to write {}", args.out.display()))?;

    let n_converged = results.iter().filter(|r| r.converged).count();
    eprintln!(
        "Common factor GWAS complete: {n_snps} SNPs, {n_converged} converged. Results: {}",
        args.out.display()
    );

    if args.save_plot_manhattan.is_some() || args.save_plot_qq.is_some() {
        let (p_values, manhattan_pts) = collect_plot_data(&results, &merged, &None);
        if let Some(qq_path) = args.save_plot_qq.as_deref() {
            gsem::plot::qq::qq_plot(&p_values, "Common Factor GWAS QQ Plot", qq_path)?;
            eprintln!("QQ plot: {}", qq_path.display());
        }
        if let Some(man_path) = args.save_plot_manhattan.as_deref() {
            if manhattan_pts.is_empty() {
                log::warn!("Manhattan plot skipped: merged sumstats has no CHR/BP columns");
            } else {
                gsem::plot::manhattan::manhattan_plot(
                    &manhattan_pts,
                    "Common Factor GWAS Manhattan Plot",
                    man_path,
                    Some(100_000),
                )?;
                eprintln!("Manhattan plot: {}", man_path.display());
            }
        }
    }

    Ok(())
}

fn run_write_model(args: WriteModelArgs) -> Result<()> {
    // Read loadings TSV: rows are phenotypes, columns are factors, values are floats
    let content = std::fs::read_to_string(&args.loadings)
        .with_context(|| format!("failed to read {}", args.loadings.display()))?;

    let mut rows: Vec<Vec<f64>> = Vec::new();
    for line in content.lines() {
        let trimmed = line.trim();
        if trimmed.is_empty() {
            continue;
        }
        let vals: Vec<f64> = trimmed
            .split(['\t', ' '])
            .filter(|s| !s.is_empty())
            .map(|s| {
                s.parse::<f64>()
                    .with_context(|| format!("failed to parse float: {s:?}"))
            })
            .collect::<Result<Vec<_>>>()?;
        rows.push(vals);
    }

    if rows.is_empty() {
        anyhow::bail!("loadings file is empty");
    }

    let n_rows = rows.len();
    let n_cols = rows[0].len();
    let loadings = Mat::from_fn(n_rows, n_cols, |i, j| rows[i][j]);

    let cutoff = args.cutoff;
    let fix_resid = args.fix_resid;
    let bifactor = args.bifactor;
    eprintln!(
        "Generating model from {}x{} loadings matrix (cutoff={cutoff}, fix_resid={fix_resid}, bifactor={bifactor})",
        n_rows, n_cols
    );

    let model_str = gsem_sem::write_model::write_model(
        &loadings,
        &args.names,
        cutoff,
        fix_resid,
        bifactor,
        false,
        false,
    );

    std::fs::write(&args.out, &model_str)
        .with_context(|| format!("failed to write {}", args.out.display()))?;

    eprintln!("Model written to {}", args.out.display());
    Ok(())
}

fn run_parallel_analysis(args: ParallelAnalysisArgs) -> Result<()> {
    let json = std::fs::read_to_string(&args.covstruc)
        .with_context(|| format!("failed to read {}", args.covstruc.display()))?;
    let ldsc_result = gsem_ldsc::LdscResult::from_json_string(&json)?;

    let k = ldsc_result.s.nrows();
    let n_sim = args.n_sim;
    eprintln!("Running parallel analysis ({k} traits, {n_sim} simulations)...");
    let pb = indicatif::ProgressBar::new(n_sim as u64);
    pb.set_style(
        indicatif::ProgressStyle::with_template(
            "[{elapsed_precise}] {bar:40.cyan/blue} {pos}/{len} simulations ({eta})",
        )
        .unwrap(),
    );
    let result = gsem::stats::parallel_analysis::parallel_analysis(
        &ldsc_result.s,
        &ldsc_result.v,
        n_sim,
        0.95,
        false,
        None,
        Some(&|| pb.inc(1)),
    );
    pb.finish_with_message("complete");

    eprintln!("Suggested number of factors: {}", result.n_factors);

    // Write TSV output
    let mut output = String::from("factor\tobserved\tsimulated_95\n");
    for (i, (obs, sim)) in result
        .observed
        .iter()
        .zip(result.simulated_95.iter())
        .enumerate()
    {
        output.push_str(&format!("{}\t{:.6}\t{:.6}\n", i + 1, obs, sim));
    }
    output.push_str(&format!("\n# n_factors={}\n", result.n_factors));

    std::fs::write(&args.out, &output)
        .with_context(|| format!("failed to write {}", args.out.display()))?;

    eprintln!("Results written to {}", args.out.display());

    if let Some(plot_path) = args.save_plot.as_deref() {
        let points: Vec<gsem::plot::scree::ScreePoint> = result
            .observed
            .iter()
            .zip(result.simulated_95.iter())
            .enumerate()
            .map(|(i, (obs, sim))| gsem::plot::scree::ScreePoint {
                factor: i + 1,
                observed: *obs,
                simulated_95: *sim,
            })
            .collect();
        gsem::plot::scree::scree_plot(&points, "Parallel Analysis Scree Plot", plot_path)?;
        eprintln!("Scree plot: {}", plot_path.display());
    }

    Ok(())
}

fn run_enrich(args: EnrichArgs) -> Result<()> {
    // Read JSON input containing all enrichment data
    let json = std::fs::read_to_string(&args.input)
        .with_context(|| format!("failed to read {}", args.input.display()))?;

    #[derive(serde::Deserialize)]
    struct EnrichInput {
        s_baseline: Vec<Vec<f64>>,
        s_annot: Vec<Vec<Vec<f64>>>,
        v_annot: Vec<Vec<Vec<f64>>>,
        annotation_names: Vec<String>,
        m_annot: Vec<f64>,
        m_total: f64,
    }

    let data: EnrichInput =
        serde_json::from_str(&json).context("failed to parse enrichment input JSON")?;

    // Convert nested vecs to faer::Mat
    let k = data.s_baseline.len();
    let s_baseline = Mat::from_fn(k, k, |i, j| data.s_baseline[i][j]);

    let s_annot: Vec<Mat<f64>> = data
        .s_annot
        .iter()
        .map(|m| {
            let rows = m.len();
            let cols = if rows > 0 { m[0].len() } else { 0 };
            Mat::from_fn(rows, cols, |i, j| m[i][j])
        })
        .collect();

    let v_annot: Vec<Mat<f64>> = data
        .v_annot
        .iter()
        .map(|m| {
            let rows = m.len();
            let cols = if rows > 0 { m[0].len() } else { 0 };
            Mat::from_fn(rows, cols, |i, j| m[i][j])
        })
        .collect();

    eprintln!(
        "Running enrichment analysis ({} annotations, {k} traits)...",
        data.annotation_names.len()
    );

    let result = gsem::stats::enrich::enrichment_test(
        &s_baseline,
        &s_annot,
        &v_annot,
        &data.annotation_names,
        &data.m_annot,
        data.m_total,
    );

    // Write TSV output
    let mut output = String::from("annotation\tenrichment\tse\tp\n");
    for i in 0..result.annotations.len() {
        output.push_str(&format!(
            "{}\t{:.6}\t{:.6}\t{:.6e}\n",
            result.annotations[i], result.enrichment[i], result.se[i], result.p[i]
        ));
    }

    std::fs::write(&args.out, &output)
        .with_context(|| format!("failed to write {}", args.out.display()))?;

    eprintln!("Enrichment results written to {}", args.out.display());
    Ok(())
}

fn run_rgmodel_cmd(args: RgmodelArgs) -> Result<()> {
    let json = std::fs::read_to_string(&args.covstruc)
        .with_context(|| format!("failed to read {}", args.covstruc.display()))?;
    let ldsc_result = gsem_ldsc::LdscResult::from_json_string(&json)?;

    let estimation = gsem_sem::EstimationMethod::from_str_lossy(&args.estimation);
    eprintln!("Fitting rgmodel (estimation={estimation})...");

    let result = gsem_sem::rgmodel::run_rgmodel_with_model(
        &ldsc_result.s,
        &ldsc_result.v,
        estimation,
        args.model.as_deref(),
        args.std_lv,
    )?;

    let k = result.r.nrows();
    eprintln!(
        "Converged. R is {k}x{k}. Underlying model chi-sq={:.4}, df={}, CFI={:.4}",
        result.sem_result.fit.chisq, result.sem_result.fit.df, result.sem_result.fit.cfi
    );

    // Write output: R matrix, then V_R matrix, then SEM parameters
    let mut output = String::new();
    output.push_str("# Model-implied genetic correlation matrix R\n");
    let col_names: Vec<String> = (0..k).map(|i| format!("V{}", i + 1)).collect();
    output.push_str(&format!("\t{}\n", col_names.join("\t")));
    for i in 0..k {
        output.push_str(&format!("V{}", i + 1));
        for j in 0..k {
            output.push_str(&format!("\t{:.6}", result.r[(i, j)]));
        }
        output.push('\n');
    }

    output.push_str("\n# Sampling covariance of vech(R) - V_R\n");
    let kstar = k * (k + 1) / 2;
    for i in 0..kstar {
        for j in 0..kstar {
            if j > 0 {
                output.push('\t');
            }
            output.push_str(&format!("{:.6e}", result.v_r[(i, j)]));
        }
        output.push('\n');
    }

    output.push_str("\n# SEM parameter estimates\n");
    output.push_str("lhs\top\trhs\test\tse\tz\tp\n");
    for param in &result.sem_result.parameters {
        output.push_str(&format!(
            "{}\t{}\t{}\t{:.6}\t{:.6}\t{:.4}\t{:.6e}\n",
            param.lhs, param.op, param.rhs, param.est, param.se, param.z, param.p
        ));
    }

    output.push_str(&format!(
        "\n# Model fit: chisq={:.4} df={} p={:.6e} AIC={:.4} CFI={:.4} SRMR={:.4}\n",
        result.sem_result.fit.chisq,
        result.sem_result.fit.df,
        result.sem_result.fit.p_chisq,
        result.sem_result.fit.aic,
        result.sem_result.fit.cfi,
        result.sem_result.fit.srmr
    ));

    std::fs::write(&args.out, &output)
        .with_context(|| format!("failed to write {}", args.out.display()))?;

    eprintln!("Results written to {}", args.out.display());
    Ok(())
}

fn run_multi_snp_cmd(args: MultiSnpArgs) -> Result<()> {
    // Read LDSC result
    let json = std::fs::read_to_string(&args.covstruc)
        .with_context(|| format!("failed to read {}", args.covstruc.display()))?;
    let ldsc_result = gsem_ldsc::LdscResult::from_json_string(&json)?;
    let k = ldsc_result.s.nrows();

    // Get model string
    let model_str = if let Some(m) = args.model {
        m
    } else if let Some(f) = args.model_file {
        std::fs::read_to_string(&f)
            .with_context(|| format!("failed to read model file {}", f.display()))?
    } else {
        anyhow::bail!("must provide --model or --model-file");
    };

    // Read merged sumstats
    eprintln!("Reading merged sumstats: {}", args.sumstats.display());
    let merged = gsem::io::sumstats_reader::read_merged_sumstats(&args.sumstats)
        .with_context(|| format!("failed to read {}", args.sumstats.display()))?;

    if merged.trait_names.len() != k {
        anyhow::bail!(
            "trait count mismatch: LDSC has {k} traits, sumstats has {} (beta.* columns)",
            merged.trait_names.len()
        );
    }

    // Read LD matrix
    eprintln!("Reading LD matrix: {}", args.ld_matrix.display());
    let (ld_mat, _ld_names) = gsem::gwas::multi_snp::read_ld_matrix(&args.ld_matrix)?;
    let n_snps = ld_mat.nrows();
    eprintln!("LD matrix: {n_snps} x {n_snps}");

    if n_snps > merged.len() {
        anyhow::bail!(
            "LD matrix has {n_snps} SNPs but sumstats only has {} SNPs",
            merged.len()
        );
    }

    // Use the first n_snps from the sumstats. Build the ref vecs
    // directly from the sub-range — don't call `merged.beta_rows()`
    // first since that would allocate n_snps_full pointers only to
    // throw most away.
    let beta_snp: Vec<&[f64]> = (0..n_snps).map(|i| merged.beta_row(i)).collect();
    let se_snp: Vec<&[f64]> = (0..n_snps).map(|i| merged.se_row(i)).collect();
    let var_snp: Vec<f64> = (0..n_snps)
        .map(|i| {
            let m = merged.maf[i];
            2.0 * m * (1.0 - m)
        })
        .collect();
    let snp_names: Vec<String> = merged.snp[..n_snps].to_vec();

    let model = gsem_sem::syntax::parse_model(&model_str, false)
        .map_err(|e| anyhow::anyhow!("model parse error: {e}"))?;
    let config = gsem::gwas::multi_snp::MultiSnpConfig {
        model,
        estimation: gsem_sem::EstimationMethod::from_str_lossy(&args.estimation),
        max_iter: 500,
        snp_var_se: None,
    };

    eprintln!("Running multi-SNP analysis with {n_snps} SNPs, {k} traits...");
    let result = gsem::gwas::multi_snp::run_multi_snp(
        &config,
        &ldsc_result.s,
        &ldsc_result.v,
        &beta_snp,
        &se_snp,
        &var_snp,
        &ld_mat,
        &snp_names,
    );

    eprintln!(
        "Done. converged={}, chisq={:.4}, df={}",
        result.converged, result.chisq, result.chisq_df
    );

    // Write TSV output
    let mut output = String::from("lhs\top\trhs\test\tse\tz\tp\n");
    for param in &result.params {
        output.push_str(&format!(
            "{}\t{}\t{}\t{:.6}\t{:.6}\t{:.4}\t{:.6e}\n",
            param.lhs, param.op, param.rhs, param.est, param.se, param.z_stat, param.p_value
        ));
    }
    output.push_str(&format!(
        "\n# chisq={:.4} df={} converged={}\n",
        result.chisq, result.chisq_df, result.converged
    ));

    std::fs::write(&args.out, &output)
        .with_context(|| format!("failed to write {}", args.out.display()))?;

    eprintln!("Results written to {}", args.out.display());
    Ok(())
}

fn run_simulate(args: SimulateArgs) -> Result<()> {
    // Read LDSC result
    let json = std::fs::read_to_string(&args.covstruc)
        .with_context(|| format!("failed to read {}", args.covstruc.display()))?;
    let ldsc_result = gsem_ldsc::LdscResult::from_json_string(&json)?;

    let k = ldsc_result.s.nrows();

    // Parse per-trait sample sizes
    let n_vec: Vec<f64> = args
        .n_per_trait
        .split(',')
        .map(|s| {
            s.trim()
                .parse::<f64>()
                .with_context(|| format!("failed to parse sample size: {s:?}"))
        })
        .collect::<Result<Vec<_>>>()?;

    if n_vec.len() != k {
        anyhow::bail!(
            "expected {k} sample sizes (one per trait), got {}",
            n_vec.len()
        );
    }

    // Read LD scores (all 22 chromosomes by default)
    let chromosomes: Vec<usize> = (1..=22).collect();
    let ld_data = gsem::io::ld_reader::read_ld_scores(&args.ld, &args.ld, &chromosomes)
        .context("failed to read LD scores")?;

    let ld_scores: Vec<f64> = ld_data.records.iter().map(|r| r.l2).collect();
    let n_snps = ld_scores.len();

    eprintln!(
        "Simulating GWAS: {k} traits, {n_snps} SNPs, M={:.0}",
        ld_data.total_m
    );

    let z_all = gsem::stats::simulation::simulate_sumstats(
        &ldsc_result.s,
        &n_vec,
        &ld_scores,
        ld_data.total_m,
        &gsem::stats::simulation::SimConfig::default(),
    );

    // Write TSV: columns are trait z-scores
    let mut output = String::new();
    // Header
    let headers: Vec<String> = (0..k).map(|i| format!("Z{}", i + 1)).collect();
    output.push_str(&headers.join("\t"));
    output.push('\n');
    // Rows (one per SNP)
    for s_idx in 0..n_snps {
        let vals: Vec<String> = z_all
            .iter()
            .map(|z_t| format!("{:.6}", z_t[s_idx]))
            .collect();
        output.push_str(&vals.join("\t"));
        output.push('\n');
    }

    std::fs::write(&args.out, &output)
        .with_context(|| format!("failed to write {}", args.out.display()))?;

    eprintln!(
        "Simulated {n_snps} SNPs x {k} traits. Results: {}",
        args.out.display()
    );
    Ok(())
}

fn parse_prevalences(s: &Option<String>, k: usize) -> Vec<Option<f64>> {
    match s {
        Some(s) => s
            .split(',')
            .map(|x| {
                let trimmed = x.trim();
                if trimmed.eq_ignore_ascii_case("na") || trimmed.is_empty() {
                    None
                } else {
                    trimmed.parse().ok()
                }
            })
            .collect(),
        None => vec![None; k],
    }
}

fn parse_chromosome_selection(select: &str, n_chr: usize) -> Vec<usize> {
    match select.to_lowercase().as_str() {
        "all" => (1..=n_chr).collect(),
        "odd" => (1..=n_chr).filter(|c| c % 2 == 1).collect(),
        "even" => (1..=n_chr).filter(|c| c % 2 == 0).collect(),
        other => {
            // Parse comma-separated list
            other
                .split(',')
                .filter_map(|s| s.trim().parse::<usize>().ok())
                .collect()
        }
    }
}

fn run_hdl(args: HdlArgs) -> Result<()> {
    use gsem_ldsc::hdl::{HdlConfig, HdlMethod, HdlTraitData, LdPiece};

    eprintln!("Reading {} trait files for HDL...", args.traits.len());

    // Read summary stats (same pattern as LDSC)
    let mut trait_data = Vec::new();
    for path in &args.traits {
        let records = gwas_reader::read_sumstats(path)
            .with_context(|| format!("failed to read {}", path.display()))?;
        let n_snps = records.len();
        eprintln!("  {}: {} SNPs", path.display(), n_snps);

        trait_data.push(HdlTraitData {
            snp: records.iter().map(|r| r.snp.clone()).collect(),
            z: records.iter().map(|r| r.z).collect(),
            n: records.iter().map(|r| r.n).collect(),
            a1: records.iter().map(|r| r.a1.clone()).collect(),
            a2: records.iter().map(|r| r.a2.clone()).collect(),
        });
    }

    // Parse HDL method
    let hdl_method = match args.method.to_lowercase().as_str() {
        "jackknife" => HdlMethod::Jackknife,
        _ => HdlMethod::Piecewise,
    };

    let config = HdlConfig {
        method: hdl_method,
        n_ref: args.n_ref,
    };

    let ld_path = &args.ld_path;

    // Load LD pieces from text format directory.
    // Expected format:
    //   {ld_path}/pieces.txt — tab-delimited: piece_idx, chr, n_snps
    //   {ld_path}/piece.{N}.snps.txt — per-piece SNP info: SNP\tA1\tA2\tLDscore
    let pieces_file = ld_path.join("pieces.txt");
    if !pieces_file.exists() {
        anyhow::bail!(
            "HDL LD reference directory not found or missing pieces.txt at {}.\n\
             HDL requires a text-format LD reference panel directory containing:\n\
             - pieces.txt: tab-delimited index file with columns: piece_idx, chr, n_snps\n\
             - piece.{{N}}.snps.txt: per-piece SNP data with columns: SNP, A1, A2, LDscore\n\n\
             The R version of GenomicSEM uses .rda files which cannot be read directly.\n\
             Convert R LD reference panels to text format first, e.g. in R:\n\
             \n  load('UKB_SVD_eigen99_extraction/LDmatrix1.RData')\n\
             \n  write.table(data.frame(SNP=snps, A1=a1, A2=a2, LDscore=ld), \
             'piece.1.snps.txt', sep='\\t', row.names=FALSE, quote=FALSE)",
            pieces_file.display()
        );
    }

    let pieces_content = std::fs::read_to_string(&pieces_file)
        .with_context(|| format!("failed to read {}", pieces_file.display()))?;

    let mut ld_pieces = Vec::new();
    for line in pieces_content.lines() {
        let line = line.trim();
        if line.is_empty() || line.starts_with('#') || line.starts_with("piece") {
            continue; // skip header or comments
        }
        let fields: Vec<&str> = line.split('\t').collect();
        if fields.len() < 3 {
            continue;
        }
        let piece_idx: usize = fields[0]
            .parse()
            .with_context(|| format!("invalid piece index in pieces.txt: '{}'", fields[0]))?;

        // Read per-piece SNP file
        let snp_file = ld_path.join(format!("piece.{piece_idx}.snps.txt"));
        if !snp_file.exists() {
            eprintln!(
                "Warning: piece file {} not found, skipping",
                snp_file.display()
            );
            continue;
        }

        let snp_content = std::fs::read_to_string(&snp_file)
            .with_context(|| format!("failed to read {}", snp_file.display()))?;

        let mut snps = Vec::new();
        let mut a1 = Vec::new();
        let mut a2 = Vec::new();
        let mut ld_scores = Vec::new();

        for sline in snp_content.lines() {
            let sline = sline.trim();
            if sline.is_empty() || sline.starts_with('#') || sline.starts_with("SNP") {
                continue;
            }
            let sf: Vec<&str> = sline.split('\t').collect();
            if sf.len() < 4 {
                continue;
            }
            snps.push(sf[0].to_string());
            a1.push(sf[1].to_string());
            a2.push(sf[2].to_string());
            if let Ok(ld) = sf[3].parse::<f64>() {
                ld_scores.push(ld);
            } else {
                ld_scores.push(0.0);
            }
        }

        let m = snps.len();
        if m > 0 {
            ld_pieces.push(LdPiece {
                snps,
                a1,
                a2,
                ld_scores,
                m,
            });
        }
    }

    if ld_pieces.is_empty() {
        anyhow::bail!("no valid LD pieces loaded from {}", ld_path.display());
    }

    eprintln!(
        "Loaded {} LD pieces ({} total SNPs)",
        ld_pieces.len(),
        ld_pieces.iter().map(|p| p.m).sum::<usize>()
    );

    // Parse prevalences
    let k = args.traits.len();
    let sp = parse_prevalences(&args.sample_prev, k);
    let pp = parse_prevalences(&args.pop_prev, k);

    eprintln!("Running HDL...");
    let result = gsem_ldsc::hdl::hdl(&trait_data, &sp, &pp, &ld_pieces, &config)?;

    // Write JSON output
    let json = result.to_json_string()?;
    std::fs::write(&args.out, &json)
        .with_context(|| format!("failed to write {}", args.out.display()))?;

    eprintln!("HDL complete. Results written to {}", args.out.display());

    // Print S matrix
    let k = result.s.nrows();
    eprintln!("\nGenetic covariance matrix (S):");
    for i in 0..k {
        let row: Vec<String> = (0..k)
            .map(|j| format!("{:8.4}", result.s[(i, j)]))
            .collect();
        eprintln!("  {}", row.join(" "));
    }

    eprintln!("\nIntercepts (I):");
    for i in 0..k {
        let row: Vec<String> = (0..k)
            .map(|j| format!("{:8.4}", result.i_mat[(i, j)]))
            .collect();
        eprintln!("  {}", row.join(" "));
    }

    Ok(())
}

fn run_summary_gls(args: SummaryGlsArgs) -> Result<()> {
    let x_content = std::fs::read_to_string(&args.x)
        .with_context(|| format!("failed to read {}", args.x.display()))?;
    let x_rows: Vec<Vec<f64>> = x_content
        .lines()
        .filter(|l| !l.trim().is_empty())
        .map(|l| {
            l.split(['\t', ' '])
                .filter(|s| !s.is_empty())
                .map(|s| s.parse::<f64>().unwrap_or(0.0))
                .collect()
        })
        .collect();

    let n = x_rows.len();
    if n == 0 {
        anyhow::bail!("empty X matrix");
    }
    let p_base = x_rows[0].len();
    let p = if args.intercept { p_base + 1 } else { p_base };

    let x = faer::Mat::from_fn(n, p, |i, j| {
        if args.intercept && j == 0 {
            1.0
        } else {
            let col = if args.intercept { j - 1 } else { j };
            x_rows[i][col]
        }
    });

    let y_content = std::fs::read_to_string(&args.y)
        .with_context(|| format!("failed to read {}", args.y.display()))?;
    let y: Vec<f64> = y_content
        .split_whitespace()
        .filter_map(|s| s.parse().ok())
        .collect();

    if y.len() != n {
        anyhow::bail!("Y length {} != X rows {}", y.len(), n);
    }

    let v_content = std::fs::read_to_string(&args.v)
        .with_context(|| format!("failed to read {}", args.v.display()))?;
    let v_rows: Vec<Vec<f64>> = v_content
        .lines()
        .filter(|l| !l.trim().is_empty())
        .map(|l| {
            l.split(['\t', ' '])
                .filter(|s| !s.is_empty())
                .map(|s| s.parse::<f64>().unwrap_or(0.0))
                .collect()
        })
        .collect();

    if v_rows.len() != n {
        anyhow::bail!("V rows {} != Y length {}", v_rows.len(), n);
    }
    let v = faer::Mat::from_fn(n, n, |i, j| v_rows[i][j]);

    eprintln!("GLS: {} observations, {} predictors", n, p);

    let result = gsem::stats::gls::summary_gls(&x, &y, &v)
        .ok_or_else(|| anyhow::anyhow!("GLS computation failed"))?;

    let mut output = String::from("predictor\tbeta\tse\tz\tp\n");
    for i in 0..result.beta.len() {
        let name = if args.intercept && i == 0 {
            "intercept".to_string()
        } else {
            format!("X{}", if args.intercept { i } else { i + 1 })
        };
        output.push_str(&format!(
            "{}\t{:.6}\t{:.6}\t{:.4}\t{:.6e}\n",
            name, result.beta[i], result.se[i], result.z[i], result.p[i]
        ));
    }

    std::fs::write(&args.out, &output)
        .with_context(|| format!("failed to write {}", args.out.display()))?;
    eprintln!("Results written to {}", args.out.display());
    Ok(())
}