#include <config.h>
#include <time.h>
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#ifndef _WIN32
#include <netdb.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <sys/select.h>
#endif
#include "htslib/knetfile.h"
#include "htslib/hts_log.h"
static int socket_wait(int fd, int is_read)
{
fd_set fds, *fdr = 0, *fdw = 0;
struct timeval tv;
int ret;
tv.tv_sec = 5; tv.tv_usec = 0; FD_ZERO(&fds);
FD_SET(fd, &fds);
if (is_read) fdr = &fds;
else fdw = &fds;
ret = select(fd+1, fdr, fdw, 0, &tv);
#ifndef _WIN32
if (ret == -1) perror("select");
#else
if (ret == 0)
hts_log_warning("Select timed out");
else if (ret == SOCKET_ERROR)
hts_log_error("Select returned error %d", WSAGetLastError());
#endif
return ret;
}
#ifndef _WIN32
# ifdef __SUNPRO_C
# pragma error_messages(off, E_END_OF_LOOP_CODE_NOT_REACHED)
# endif
static int socket_connect(const char *host, const char *port)
{
#define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)
int ai_err, on = 1, fd;
struct linger lng = { 0, 0 };
struct addrinfo hints, *res = 0;
memset(&hints, 0, sizeof(struct addrinfo));
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
if ((ai_err = getaddrinfo(host, port, &hints, &res)) != 0) { hts_log_error("Can't resolve %s:%s: %s", host, port, gai_strerror(ai_err)); return -1; }
if ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt");
if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("setsockopt");
if (connect(fd, res->ai_addr, res->ai_addrlen) != 0) __err_connect("connect");
freeaddrinfo(res);
return fd;
}
# ifdef __SUNPRO_C
# pragma error_messages(off, E_END_OF_LOOP_CODE_NOT_REACHED)
# endif
#else
char *int64tostr(char *buf, int64_t x)
{
int cnt;
int i = 0;
do {
buf[i++] = '0' + x % 10;
x /= 10;
} while (x);
buf[i] = 0;
for (cnt = i, i = 0; i < cnt/2; ++i) {
int c = buf[i]; buf[i] = buf[cnt-i-1]; buf[cnt-i-1] = c;
}
return buf;
}
int64_t strtoint64(const char *buf)
{
int64_t x;
for (x = 0; *buf != '\0'; ++buf)
x = x * 10 + ((int64_t) *buf - 48);
return x;
}
int knet_win32_init()
{
WSADATA wsaData;
return WSAStartup(MAKEWORD(2, 2), &wsaData);
}
void knet_win32_destroy()
{
WSACleanup();
}
static SOCKET socket_connect(const char *host, const char *port)
{
#define __err_connect(func) \
do { \
hts_log_error("The %s operation returned error %d", func, WSAGetLastError()); \
return -1; \
} while (0)
int on = 1;
SOCKET fd;
struct linger lng = { 0, 0 };
struct sockaddr_in server;
struct hostent *hp = 0;
if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == INVALID_SOCKET) __err_connect("socket");
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*)&on, sizeof(on)) == -1) __err_connect("setsockopt");
if (setsockopt(fd, SOL_SOCKET, SO_LINGER, (char*)&lng, sizeof(lng)) == -1) __err_connect("setsockopt");
if (isalpha(host[0])) hp = gethostbyname(host);
else {
struct in_addr addr;
addr.s_addr = inet_addr(host);
hp = gethostbyaddr((char*)&addr, 4, AF_INET);
}
if (hp == 0) __err_connect("gethost");
server.sin_addr.s_addr = *((unsigned long*)hp->h_addr);
server.sin_family= AF_INET;
server.sin_port = htons(atoi(port));
if (connect(fd, (struct sockaddr*)&server, sizeof(server)) != 0) __err_connect("connect");
return fd;
}
#endif
static off_t my_netread(int fd, void *buf, off_t len)
{
off_t rest = len, curr, l = 0;
while (rest) {
if (socket_wait(fd, 1) <= 0) break; curr = netread(fd, (void*)((char*)buf + l), rest);
if (curr == 0) break;
l += curr; rest -= curr;
}
return l;
}
static int kftp_get_response(knetFile *ftp)
{
#ifndef _WIN32
unsigned char c;
#else
char c;
#endif
int n = 0;
char *p;
if (socket_wait(ftp->ctrl_fd, 1) <= 0) return 0;
while (netread(ftp->ctrl_fd, &c, 1)) { if (n >= ftp->max_response) {
ftp->max_response = ftp->max_response? ftp->max_response<<1 : 256;
ftp->response = (char*)realloc(ftp->response, ftp->max_response);
}
ftp->response[n++] = c;
if (c == '\n') {
if (n >= 4 && isdigit((int)((unsigned char) ftp->response[0])) && isdigit((int)((unsigned char) ftp->response[1])) && isdigit((int)((unsigned char) ftp->response[2]))
&& ftp->response[3] != '-') break;
n = 0;
continue;
}
}
if (n < 2) return -1;
ftp->response[n-2] = 0;
return strtol(ftp->response, &p, 0);
}
static int kftp_send_cmd(knetFile *ftp, const char *cmd, int is_get)
{
if (socket_wait(ftp->ctrl_fd, 0) <= 0) return -1; int len = strlen(cmd);
if ( netwrite(ftp->ctrl_fd, cmd, len) != len ) return -1;
return is_get? kftp_get_response(ftp) : 0;
}
static int kftp_pasv_prep(knetFile *ftp)
{
char *p;
int v[6];
kftp_send_cmd(ftp, "PASV\r\n", 1);
for (p = ftp->response; *p && *p != '('; ++p);
if (*p != '(') return -1;
++p;
sscanf(p, "%d,%d,%d,%d,%d,%d", &v[0], &v[1], &v[2], &v[3], &v[4], &v[5]);
memcpy(ftp->pasv_ip, v, 4 * sizeof(int));
ftp->pasv_port = (v[4]<<8&0xff00) + v[5];
return 0;
}
static int kftp_pasv_connect(knetFile *ftp)
{
char host[80], port[10];
if (ftp->pasv_port == 0) {
hts_log_error("Must call kftp_pasv_prep() first");
return -1;
}
sprintf(host, "%d.%d.%d.%d", ftp->pasv_ip[0], ftp->pasv_ip[1], ftp->pasv_ip[2], ftp->pasv_ip[3]);
sprintf(port, "%d", ftp->pasv_port);
ftp->fd = socket_connect(host, port);
if (ftp->fd == -1) return -1;
return 0;
}
int kftp_connect(knetFile *ftp)
{
ftp->ctrl_fd = socket_connect(ftp->host, ftp->port);
if (ftp->ctrl_fd == -1) return -1;
kftp_get_response(ftp);
kftp_send_cmd(ftp, "USER anonymous\r\n", 1);
kftp_send_cmd(ftp, "PASS kftp@\r\n", 1);
kftp_send_cmd(ftp, "TYPE I\r\n", 1);
return 0;
}
int kftp_reconnect(knetFile *ftp)
{
if (ftp->ctrl_fd != -1) {
netclose(ftp->ctrl_fd);
ftp->ctrl_fd = -1;
}
netclose(ftp->fd);
ftp->fd = -1;
return kftp_connect(ftp);
}
knetFile *kftp_parse_url(const char *fn, const char *mode)
{
knetFile *fp;
char *p;
int l;
if (strstr(fn, "ftp://") != fn) return 0;
for (p = (char*)fn + 6; *p && *p != '/'; ++p);
if (*p != '/') return 0;
l = p - fn - 6;
fp = (knetFile*)calloc(1, sizeof(knetFile));
fp->type = KNF_TYPE_FTP;
fp->fd = -1;
fp->port = strdup("21");
fp->host = (char*)calloc(l + 1, 1);
if (strchr(mode, 'c')) fp->no_reconnect = 1;
strncpy(fp->host, fn + 6, l);
fp->retr = (char*)calloc(strlen(p) + 8, 1);
sprintf(fp->retr, "RETR %s\r\n", p);
fp->size_cmd = (char*)calloc(strlen(p) + 8, 1);
sprintf(fp->size_cmd, "SIZE %s\r\n", p);
fp->seek_offset = 0;
return fp;
}
int kftp_connect_file(knetFile *fp)
{
int ret;
long long file_size;
if (fp->fd != -1) {
netclose(fp->fd);
if (fp->no_reconnect) kftp_get_response(fp);
}
kftp_pasv_prep(fp);
kftp_send_cmd(fp, fp->size_cmd, 1);
#ifndef _WIN32
if ( sscanf(fp->response,"%*d %lld", &file_size) != 1 ) return -1;
#else
const char *p = fp->response;
while (*p != ' ') ++p;
while (*p < '0' || *p > '9') ++p;
file_size = strtoint64(p);
#endif
fp->file_size = file_size;
if (fp->offset>=0) {
char tmp[32];
#ifndef _WIN32
sprintf(tmp, "REST %lld\r\n", (long long)fp->offset);
#else
strcpy(tmp, "REST ");
int64tostr(tmp + 5, fp->offset);
strcat(tmp, "\r\n");
#endif
kftp_send_cmd(fp, tmp, 1);
}
kftp_send_cmd(fp, fp->retr, 0);
kftp_pasv_connect(fp);
ret = kftp_get_response(fp);
if (ret != 150) {
hts_log_error("%s", fp->response);
netclose(fp->fd);
fp->fd = -1;
return -1;
}
fp->is_ready = 1;
return 0;
}
knetFile *khttp_parse_url(const char *fn, const char *mode)
{
knetFile *fp;
char *p, *proxy, *q;
int l;
if (strstr(fn, "http://") != fn) return 0;
for (p = (char*)fn + 7; *p && *p != '/'; ++p);
l = p - fn - 7;
fp = (knetFile*)calloc(1, sizeof(knetFile));
fp->http_host = (char*)calloc(l + 1, 1);
strncpy(fp->http_host, fn + 7, l);
fp->http_host[l] = 0;
for (q = fp->http_host; *q && *q != ':'; ++q);
if (*q == ':') *q++ = 0;
proxy = getenv("http_proxy");
if (proxy == 0) {
fp->host = strdup(fp->http_host); fp->port = strdup(*q? q : "80");
fp->path = strdup(*p? p : "/");
} else {
fp->host = (strstr(proxy, "http://") == proxy)? strdup(proxy + 7) : strdup(proxy);
for (q = fp->host; *q && *q != ':'; ++q);
if (*q == ':') *q++ = 0;
fp->port = strdup(*q? q : "80");
fp->path = strdup(fn);
}
fp->type = KNF_TYPE_HTTP;
fp->ctrl_fd = fp->fd = -1;
fp->seek_offset = 0;
return fp;
}
int khttp_connect_file(knetFile *fp)
{
int ret, l = 0;
char *buf, *p;
if (fp->fd != -1) netclose(fp->fd);
fp->fd = socket_connect(fp->host, fp->port);
buf = (char*)calloc(0x10000, 1); l += sprintf(buf + l, "GET %s HTTP/1.0\r\nHost: %s\r\n", fp->path, fp->http_host);
if (fp->offset != 0) l += sprintf(buf + l, "Range: bytes=%lld-\r\n", (long long)fp->offset);
l += sprintf(buf + l, "\r\n");
if ( netwrite(fp->fd, buf, l) != l ) { free(buf); return -1; }
l = 0;
while (netread(fp->fd, buf + l, 1)) { if (buf[l] == '\n' && l >= 3)
if (strncmp(buf + l - 3, "\r\n\r\n", 4) == 0) break;
++l;
}
buf[l] = 0;
if (l < 14) { free(buf);
netclose(fp->fd);
fp->fd = -1;
return -1;
}
ret = strtol(buf + 8, &p, 0); if (ret == 200 && fp->offset>0) { off_t rest = fp->offset;
while (rest) {
off_t l = rest < 0x10000? rest : 0x10000;
rest -= my_netread(fp->fd, buf, l);
}
} else if (ret != 206 && ret != 200) {
free(buf);
netclose(fp->fd);
switch (ret) {
case 401: errno = EPERM; break;
case 403: errno = EACCES; break;
case 404: errno = ENOENT; break;
case 407: errno = EPERM; break;
case 408: errno = ETIMEDOUT; break;
case 410: errno = ENOENT; break;
case 503: errno = EAGAIN; break;
case 504: errno = ETIMEDOUT; break;
default: errno = (ret >= 400 && ret < 500)? EINVAL : EIO; break;
}
fp->fd = -1;
return -1;
}
free(buf);
fp->is_ready = 1;
return 0;
}
knetFile *knet_open(const char *fn, const char *mode)
{
knetFile *fp = 0;
if (mode[0] != 'r') {
hts_log_error("Only mode \"r\" is supported");
errno = ENOTSUP;
return 0;
}
if (strstr(fn, "ftp://") == fn) {
fp = kftp_parse_url(fn, mode);
if (fp == 0) return 0;
if (kftp_connect(fp) == -1) {
knet_close(fp);
return 0;
}
kftp_connect_file(fp);
} else if (strstr(fn, "http://") == fn) {
fp = khttp_parse_url(fn, mode);
if (fp == 0) return 0;
khttp_connect_file(fp);
} else { #ifdef _WIN32
int fd = open(fn, O_RDONLY | O_BINARY);
#else
int fd = open(fn, O_RDONLY);
#endif
if (fd == -1) {
perror("open");
return 0;
}
fp = (knetFile*)calloc(1, sizeof(knetFile));
fp->type = KNF_TYPE_LOCAL;
fp->fd = fd;
fp->ctrl_fd = -1;
}
if (fp && fp->fd == -1) {
knet_close(fp);
return 0;
}
return fp;
}
knetFile *knet_dopen(int fd, const char *mode)
{
knetFile *fp = (knetFile*)calloc(1, sizeof(knetFile));
fp->type = KNF_TYPE_LOCAL;
fp->fd = fd;
return fp;
}
ssize_t knet_read(knetFile *fp, void *buf, size_t len)
{
off_t l = 0;
if (fp->fd == -1) return 0;
if (fp->type == KNF_TYPE_FTP) {
if (fp->is_ready == 0) {
if (!fp->no_reconnect) kftp_reconnect(fp);
kftp_connect_file(fp);
}
} else if (fp->type == KNF_TYPE_HTTP) {
if (fp->is_ready == 0)
khttp_connect_file(fp);
}
if (fp->type == KNF_TYPE_LOCAL) { size_t rest = len;
ssize_t curr;
while (rest) {
do {
curr = read(fp->fd, (void*)((char*)buf + l), rest);
} while (curr < 0 && EINTR == errno);
if (curr < 0) return -1;
if (curr == 0) break;
l += curr; rest -= curr;
}
} else l = my_netread(fp->fd, buf, len);
fp->offset += l;
return l;
}
off_t knet_seek(knetFile *fp, off_t off, int whence)
{
if (whence == SEEK_SET && off == fp->offset) return 0;
if (fp->type == KNF_TYPE_LOCAL) {
off_t offset = lseek(fp->fd, off, whence);
if (offset == -1) return -1;
fp->offset = offset;
return fp->offset;
} else if (fp->type == KNF_TYPE_FTP) {
if (whence == SEEK_CUR) fp->offset += off;
else if (whence == SEEK_SET) fp->offset = off;
else if (whence == SEEK_END) fp->offset = fp->file_size + off;
else return -1;
fp->is_ready = 0;
return fp->offset;
} else if (fp->type == KNF_TYPE_HTTP) {
if (whence == SEEK_END) { hts_log_error("SEEK_END is not supported for HTTP. Offset is unchanged");
errno = ESPIPE;
return -1;
}
if (whence == SEEK_CUR) fp->offset += off;
else if (whence == SEEK_SET) fp->offset = off;
else return -1;
fp->is_ready = 0;
return fp->offset;
}
errno = EINVAL;
hts_log_error("%s", strerror(errno));
return -1;
}
int knet_close(knetFile *fp)
{
if (fp == 0) return 0;
if (fp->ctrl_fd != -1) netclose(fp->ctrl_fd); if (fp->fd != -1) {
if (fp->type == KNF_TYPE_LOCAL) close(fp->fd);
else netclose(fp->fd);
}
free(fp->host); free(fp->port);
free(fp->response); free(fp->retr); free(fp->path); free(fp->http_host); free(fp);
return 0;
}
#ifdef KNETFILE_MAIN
int main(void)
{
char *buf;
knetFile *fp;
int type = 4, l;
#ifdef _WIN32
knet_win32_init();
#endif
buf = calloc(0x100000, 1);
if (type == 0) {
fp = knet_open("knetfile.c", "r");
knet_seek(fp, 1000, SEEK_SET);
} else if (type == 1) { fp = knet_open("ftp://ftp.ncbi.nih.gov/1000genomes/ftp/data/NA12878/alignment/NA12878.chrom6.SLX.SRP000032.2009_06.bam", "r");
knet_seek(fp, 2500000000ll, SEEK_SET);
l = knet_read(fp, buf, 255);
} else if (type == 2) {
fp = knet_open("ftp://ftp.sanger.ac.uk/pub4/treefam/tmp/index.shtml", "r");
knet_seek(fp, 1000, SEEK_SET);
} else if (type == 3) {
fp = knet_open("http://www.sanger.ac.uk/Users/lh3/index.shtml", "r");
knet_seek(fp, 1000, SEEK_SET);
} else if (type == 4) {
fp = knet_open("http://www.sanger.ac.uk/Users/lh3/ex1.bam", "r");
knet_read(fp, buf, 10000);
knet_seek(fp, 20000, SEEK_SET);
knet_seek(fp, 10000, SEEK_SET);
l = knet_read(fp, buf+10000, 10000000) + 10000;
}
if (type != 4 && type != 1) {
knet_read(fp, buf, 255);
buf[255] = 0;
printf("%s\n", buf);
} else write(fileno(stdout), buf, l);
knet_close(fp);
free(buf);
return 0;
}
#endif