#include <config.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <getopt.h>
#include <stdint.h>
#include "cram/cram.h"
#include "htslib/sam.h"
enum test_op {
READ_COMPRESSED = 1,
WRITE_COMPRESSED = 2,
READ_CRAM = 4,
WRITE_CRAM = 8,
WRITE_UNCOMPRESSED = 16,
};
int main(int argc, char *argv[])
{
samFile *in;
char *fn_ref = 0;
int flag = 0, c, clevel = -1, ignore_sam_err = 0;
char moder[8];
bam_hdr_t *h;
bam1_t *b;
htsFile *out;
char modew[800];
int r = 0, exit_code = 0;
hts_opt *in_opts = NULL, *out_opts = NULL;
int nreads = 0;
int extra_hdr_nuls = 0;
int benchmark = 0;
int nthreads = 0; int multi_reg = 0;
while ((c = getopt(argc, argv, "DSIt:i:bCul:o:N:BZ:@:M")) >= 0) {
switch (c) {
case 'D': flag |= READ_CRAM; break;
case 'S': flag |= READ_COMPRESSED; break;
case 'I': ignore_sam_err = 1; break;
case 't': fn_ref = optarg; break;
case 'i': if (hts_opt_add(&in_opts, optarg)) return 1; break;
case 'b': flag |= WRITE_COMPRESSED; break;
case 'C': flag |= WRITE_CRAM; break;
case 'u': flag |= WRITE_UNCOMPRESSED; break; case 'l': clevel = atoi(optarg); flag |= WRITE_COMPRESSED; break;
case 'o': if (hts_opt_add(&out_opts, optarg)) return 1; break;
case 'N': nreads = atoi(optarg); break;
case 'B': benchmark = 1; break;
case 'Z': extra_hdr_nuls = atoi(optarg); break;
case 'M': multi_reg = 1; break;
case '@': nthreads = atoi(optarg); break;
}
}
if (argc == optind) {
fprintf(stderr, "Usage: test_view [-DSI] [-t fn_ref] [-i option=value] [-bC] [-l level] [-o option=value] [-N num_reads] [-B] [-Z hdr_nuls] [-@ num_threads] <in.bam>|<in.sam>|<in.cram> [region]\n");
fprintf(stderr, "\n");
fprintf(stderr, "-D: read CRAM format (mode 'c')\n");
fprintf(stderr, "-S: read compressed BCF, BAM, FAI (mode 'b')\n");
fprintf(stderr, "-I: ignore SAM parsing errors\n");
fprintf(stderr, "-t: fn_ref: load CRAM references from the specificed fasta file instead of @SQ headers when writing a CRAM file\n");
fprintf(stderr, "-i: option=value: set an option for CRAM input\n");
fprintf(stderr, "\n");
fprintf(stderr, "-b: write compressed BCF, BAM, FAI (mode 'b')\n");
fprintf(stderr, "-C: write CRAM format (mode 'c')\n");
fprintf(stderr, "-l 0-9: set zlib compression level\n");
fprintf(stderr, "-o option=value: set an option for CRAM output\n");
fprintf(stderr, "-N: num_reads: limit the output to the first num_reads reads\n");
fprintf(stderr, "\n");
fprintf(stderr, "-B: enable benchmarking\n");
fprintf(stderr, "-M: use hts_itr_multi iterator\n");
fprintf(stderr, "-Z hdr_nuls: append specified number of null bytes to the SAM header\n");
fprintf(stderr, "-@ num_threads: use thread pool with specified number of threads\n\n");
fprintf(stderr, "The region list entries should be specified as 'reg:beg-end', with intervals of a region being disjunct and sorted by the starting coordinate.\n");
return 1;
}
strcpy(moder, "r");
if (flag & READ_CRAM) strcat(moder, "c");
else if ((flag & READ_COMPRESSED) == 0) strcat(moder, "b");
in = sam_open(argv[optind], moder);
if (in == NULL) {
fprintf(stderr, "Error opening \"%s\"\n", argv[optind]);
return EXIT_FAILURE;
}
h = sam_hdr_read(in);
if (h == NULL) {
fprintf(stderr, "Couldn't read header for \"%s\"\n", argv[optind]);
return EXIT_FAILURE;
}
h->ignore_sam_err = ignore_sam_err;
if (extra_hdr_nuls) {
char *new_text = realloc(h->text, h->l_text + extra_hdr_nuls);
if (new_text == NULL) {
fprintf(stderr, "Error reallocing header text\n");
return EXIT_FAILURE;
}
h->text = new_text;
memset(&h->text[h->l_text], 0, extra_hdr_nuls);
h->l_text += extra_hdr_nuls;
}
b = bam_init1();
strcpy(modew, "w");
if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel);
if (flag & WRITE_CRAM) strcat(modew, "c");
else if (flag & WRITE_COMPRESSED) strcat(modew, "b");
else if (flag & WRITE_UNCOMPRESSED) strcat(modew, "bu");
out = hts_open("-", modew);
if (out == NULL) {
fprintf(stderr, "Error opening standard output\n");
return EXIT_FAILURE;
}
if (flag & WRITE_CRAM) {
int ret;
out->fp.cram->header = sam_hdr_parse_(h->text, h->l_text);
if (fn_ref)
ret = cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, fn_ref);
else
ret = cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, NULL);
if (ret != 0)
return EXIT_FAILURE;
}
if (hts_opt_apply(in, in_opts))
return EXIT_FAILURE;
hts_opt_free(in_opts);
if (hts_opt_apply(out, out_opts))
return EXIT_FAILURE;
hts_opt_free(out_opts);
htsThreadPool p = {NULL, 0};
if (nthreads > 0) {
p.pool = hts_tpool_init(nthreads);
if (!p.pool) {
fprintf(stderr, "Error creating thread pool\n");
exit_code = 1;
} else {
hts_set_opt(in, HTS_OPT_THREAD_POOL, &p);
hts_set_opt(out, HTS_OPT_THREAD_POOL, &p);
}
}
if (!benchmark && sam_hdr_write(out, h) < 0) {
fprintf(stderr, "Error writing output header.\n");
exit_code = 1;
}
if (optind + 1 < argc && !(flag & READ_COMPRESSED)) { int i;
hts_idx_t *idx;
if ((idx = sam_index_load(in, argv[optind])) == 0) {
fprintf(stderr, "[E::%s] fail to load the BAM index\n", __func__);
return 1;
}
if (multi_reg) {
int reg_count = 0;
hts_reglist_t *reg_list = calloc(argc-(optind+1), sizeof(*reg_list));
if (!reg_list)
return 1;
for (i = optind + 1; i < argc; ++i) {
int j;
uint32_t beg, end;
char *cp = strrchr(argv[i], ':');
if (cp) *cp = 0;
for (j = 0; j < reg_count; j++)
if (strcmp(reg_list[j].reg, argv[i]) == 0)
break;
if (j == reg_count) {
reg_list[reg_count++].reg = argv[i];
if (strcmp(".", argv[i]) == 0) {
reg_list[j].tid = HTS_IDX_START;
} else if (strcmp("*", argv[i]) == 0) {
reg_list[j].tid = HTS_IDX_NOCOOR;
} else {
int k; for (k = 0; k < h->n_targets; k++)
if (strcmp(h->target_name[k], argv[i]) == 0)
break;
if (k == h->n_targets)
return 1;
reg_list[j].tid = k;
reg_list[j].min_beg = h->target_len[k];
reg_list[j].max_end = 0;
}
}
hts_reglist_t *r = ®_list[j];
r->intervals = realloc(r->intervals, ++r->count * sizeof(*r->intervals));
if (!r->intervals)
return 1;
beg = 1;
end = r->tid >= 0 ? h->target_len[r->tid] : 0;
if (cp) {
*cp = 0;
sscanf(cp+1, "%d-%d", &beg, &end);
}
r->intervals[r->count-1].beg = beg-1; r->intervals[r->count-1].end = end;
if (r->min_beg > beg)
r->min_beg = beg;
if (r->max_end < end)
r->max_end = end;
}
hts_itr_multi_t *iter = sam_itr_regions(idx, h, reg_list, reg_count);
if (!iter)
return 1;
while ((r = sam_itr_multi_next(in, iter, b)) >= 0) {
if (!benchmark && sam_write1(out, h, b) < 0) {
fprintf(stderr, "Error writing output.\n");
exit_code = 1;
break;
}
if (nreads && --nreads == 0)
break;
}
hts_itr_multi_destroy(iter);
} else {
for (i = optind + 1; i < argc; ++i) {
hts_itr_t *iter;
if ((iter = sam_itr_querys(idx, h, argv[i])) == 0) {
fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, argv[i]);
continue;
}
while ((r = sam_itr_next(in, iter, b)) >= 0) {
if (!benchmark && sam_write1(out, h, b) < 0) {
fprintf(stderr, "Error writing output.\n");
exit_code = 1;
break;
}
if (nreads && --nreads == 0)
break;
}
hts_itr_destroy(iter);
}
}
hts_idx_destroy(idx);
} else while ((r = sam_read1(in, h, b)) >= 0) {
if (!benchmark && sam_write1(out, h, b) < 0) {
fprintf(stderr, "Error writing output.\n");
exit_code = 1;
break;
}
if (nreads && --nreads == 0)
break;
}
if (r < -1) {
fprintf(stderr, "Error parsing input.\n");
exit_code = 1;
}
r = sam_close(out);
if (r < 0) {
fprintf(stderr, "Error closing output.\n");
exit_code = 1;
}
bam_destroy1(b);
bam_hdr_destroy(h);
r = sam_close(in);
if (r < 0) {
fprintf(stderr, "Error closing input.\n");
exit_code = 1;
}
if (p.pool)
hts_tpool_destroy(p.pool);
return exit_code;
}