#include "config.h"
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#if defined(_MSC_VER)
#include <io.h>
#define READ_FUNCTION _read
#else
#include <unistd.h>
#define READ_FUNCTION read
#endif
#include <kseq.h>
#if HAVE_ZLIB
#include <zlib.h>
KSEQ_INIT(gzFile, gzread)
#else
KSEQ_INIT(int, READ_FUNCTION)
#endif
#include "parasail.h"
#include "parasail/io.h"
#include "parasail/stats.h"
static char* strdup_rpl(const char *str)
{
size_t l = strlen(str);
char *s = (char*)calloc(l + 1, sizeof(char));
if(!s) {
return NULL;
}
(void)memcpy(s, str, l);
return s;
}
parasail_sequences_t* parasail_sequences_from_file(const char *filename)
{
#if HAVE_ZLIB
gzFile fp;
#else
FILE* fp;
int is_stdin = 0;
#endif
kseq_t *seq = NULL;
int l = 0;
parasail_sequences_t *retval = NULL;
parasail_sequence_t *sequences = NULL;
unsigned long count = 0;
unsigned long capacity = 1000;
size_t chars = 0;
stats_t stats;
int error_during_kseq = 0;
stats_clear(&stats);
retval = (parasail_sequences_t*)malloc(sizeof(parasail_sequences_t));
if (NULL == retval) {
perror("malloc");
fprintf(stderr, "parasail_sequences_from_file: failed\n");
return NULL;
}
sequences = (parasail_sequence_t*)malloc(sizeof(parasail_sequence_t) * capacity);
if (NULL == sequences) {
perror("malloc");
fprintf(stderr, "parasail_sequences_from_file: failed\n");
free(retval);
return NULL;
}
if (0 == strncmp("stdin", filename, 5)) {
#if HAVE_ZLIB
fp = gzdopen(fileno(stdin), "r");
if (fp == Z_NULL) {
perror("gzdopen");
fprintf(stderr, "parasail_sequences_from_file: failed\n");
free(sequences);
free(retval);
return NULL;
}
#else
fp = stdin;
is_stdin = 1;
#endif
}
else {
errno = 0;
#if HAVE_ZLIB
fp = gzopen(filename, "r");
if (fp == Z_NULL) {
perror("gzopen");
fprintf(stderr, "parasail_sequences_from_file: failed\n");
free(sequences);
free(retval);
return NULL;
}
#else
fp = fopen(filename, "r");
if (fp == NULL) {
perror("fopen");
fprintf(stderr, "parasail_sequences_from_file: failed\n");
free(sequences);
free(retval);
return NULL;
}
#endif
}
#if HAVE_ZLIB
seq = kseq_init(fp);
#else
seq = kseq_init(fileno(fp));
#endif
while ((l = kseq_read(seq)) >= 0) {
errno = 0;
sequences[count].name.l = seq->name.l;
sequences[count].comment.l = seq->comment.l;
sequences[count].seq.l = seq->seq.l;
sequences[count].qual.l = seq->qual.l;
sequences[count].name.s = NULL;
sequences[count].comment.s = NULL;
sequences[count].seq.s = NULL;
sequences[count].qual.s = NULL;
stats_sample_value(&stats, seq->seq.l);
chars += seq->seq.l;
if (sequences[count].name.l) {
sequences[count].name.s = strdup_rpl(seq->name.s);
if (NULL == sequences[count].name.s) {
perror("strdup name");
error_during_kseq = 1;
break;
}
}
if (sequences[count].comment.l) {
sequences[count].comment.s = strdup_rpl(seq->comment.s);
if (NULL == sequences[count].comment.s) {
perror("strdup comment");
error_during_kseq = 1;
break;
}
}
if (sequences[count].seq.l) {
sequences[count].seq.s = strdup_rpl(seq->seq.s);
if (NULL == sequences[count].seq.s) {
perror("strdup seq");
error_during_kseq = 1;
break;
}
}
if (sequences[count].qual.l) {
sequences[count].qual.s = strdup_rpl(seq->qual.s);
if (NULL == sequences[count].qual.s) {
perror("strdup qual");
error_during_kseq = 1;
break;
}
}
++count;
if (count >= capacity) {
parasail_sequence_t *new_sequences = NULL;
capacity *= 2;
errno = 0;
new_sequences = (parasail_sequence_t*)realloc(sequences, sizeof(parasail_sequence_t) * capacity);
if (NULL == new_sequences) {
perror("realloc");
error_during_kseq = 2;
break;
}
sequences = new_sequences;
errno = 0;
}
}
kseq_destroy(seq);
#if HAVE_ZLIB
gzclose(fp);
#else
if (!is_stdin) {
fclose(fp);
}
#endif
retval->seqs = sequences;
retval->l = count;
retval->characters = chars;
retval->shortest = stats._min;
retval->longest = stats._max;
retval->mean = stats._mean;
retval->stddev = stats_stddev(&stats);
if (0 != error_during_kseq) {
fprintf(stderr, "parasail_sequences_from_file: failed\n");
if (1 == error_during_kseq) {
retval->l++;
}
parasail_sequences_free(retval);
return NULL;
}
return retval;
}
void parasail_sequences_free(parasail_sequences_t *sequences)
{
size_t i;
for (i=0; i<sequences->l; ++i) {
if (sequences->seqs[i].name.s) free(sequences->seqs[i].name.s);
if (sequences->seqs[i].comment.s) free(sequences->seqs[i].comment.s);
if (sequences->seqs[i].seq.s) free(sequences->seqs[i].seq.s);
if (sequences->seqs[i].qual.s) free(sequences->seqs[i].qual.s);
}
free(sequences->seqs);
free(sequences);
}
char* parasail_sequences_pack(const parasail_sequences_t *sequences, size_t *size)
{
size_t i = 0;
size_t offset = 0;
char *packed = NULL;
packed = (char*)malloc(sizeof(char) * (sequences->characters+sequences->l+1));
if (NULL == packed) {
perror("malloc");
fprintf(stderr, "parasail_sequences_pack: failed\n");
return NULL;
}
for (i=0; i<sequences->l; ++i) {
memcpy(&packed[offset], sequences->seqs[i].seq.s, sequences->seqs[i].seq.l);
offset += sequences->seqs[i].seq.l;
packed[offset++] = '$';
}
packed[offset] = '\0';
*size = offset;
return packed;
}