#include "curl_setup.h"
#include "urldata.h"
#include "urlapi-int.h"
#include "strcase.h"
#include "url.h"
#include "escape.h"
#include "curl_ctype.h"
#include "curlx/inet_pton.h"
#include "curlx/inet_ntop.h"
#include "strdup.h"
#include "idn.h"
#include "curlx/strparse.h"
#include "curl_memrchr.h"
#include "curl_memory.h"
#include "memdebug.h"
#ifdef _WIN32
#define STARTS_WITH_DRIVE_PREFIX(str) \
((('a' <= str[0] && str[0] <= 'z') || \
('A' <= str[0] && str[0] <= 'Z')) && \
(str[1] == ':'))
#endif
#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
((('a' <= (str)[0] && (str)[0] <= 'z') || \
('A' <= (str)[0] && (str)[0] <= 'Z')) && \
((str)[1] == ':' || (str)[1] == '|') && \
((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
#define MAX_SCHEME_LEN 40
#if !defined(USE_IPV6) && !defined(AF_INET6)
#define AF_INET6 (AF_INET + 1)
#endif
struct Curl_URL {
char *scheme;
char *user;
char *password;
char *options;
char *host;
char *zoneid;
char *port;
char *path;
char *query;
char *fragment;
unsigned short portnum;
BIT(query_present);
BIT(fragment_present);
BIT(guessed_scheme);
};
#define DEFAULT_SCHEME "https"
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
unsigned int flags);
static void free_urlhandle(struct Curl_URL *u)
{
free(u->scheme);
free(u->user);
free(u->password);
free(u->options);
free(u->host);
free(u->zoneid);
free(u->port);
free(u->path);
free(u->query);
free(u->fragment);
}
static const char *find_host_sep(const char *url)
{
const char *sep = strstr(url, "//");
if(!sep)
sep = url;
else
sep += 2;
while(*sep && *sep != '/' && *sep != '?')
sep++;
return sep;
}
#define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE : \
CURLUE_OUT_OF_MEMORY)
static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
size_t len, bool relative,
bool query)
{
bool left = !query;
const unsigned char *iptr;
const unsigned char *host_sep = (const unsigned char *) url;
CURLcode result = CURLE_OK;
if(!relative) {
size_t n;
host_sep = (const unsigned char *) find_host_sep(url);
n = (const char *)host_sep - url;
result = curlx_dyn_addn(o, url, n);
len -= n;
}
for(iptr = host_sep; len && !result; iptr++, len--) {
if(*iptr == ' ') {
if(left)
result = curlx_dyn_addn(o, "%20", 3);
else
result = curlx_dyn_addn(o, "+", 1);
}
else if((*iptr < ' ') || (*iptr >= 0x7f)) {
unsigned char out[3]={'%'};
Curl_hexbyte(&out[1], *iptr);
result = curlx_dyn_addn(o, out, 3);
}
else {
result = curlx_dyn_addn(o, iptr, 1);
if(*iptr == '?')
left = FALSE;
}
}
if(result)
return cc2cu(result);
return CURLUE_OK;
}
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
bool guess_scheme)
{
size_t i = 0;
DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
(void)buflen;
if(buf)
buf[0] = 0;
#ifdef _WIN32
if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
return 0;
#endif
if(ISALPHA(url[0]))
for(i = 1; i < MAX_SCHEME_LEN; ++i) {
char s = url[i];
if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
}
else {
break;
}
}
if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
size_t len = i;
if(buf) {
Curl_strntolower(buf, url, i);
buf[i] = 0;
}
return len;
}
return 0;
}
static CURLUcode redirect_url(const char *base, const char *relurl,
CURLU *u, unsigned int flags)
{
struct dynbuf urlbuf;
bool host_changed = FALSE;
const char *useurl = relurl;
const char *cutoff = NULL;
size_t prelen;
CURLUcode uc;
const char *protsep = base + strlen(u->scheme) + 3;
DEBUGASSERT(base && relurl && u);
if(!base)
return CURLUE_MALFORMED_INPUT;
switch(relurl[0]) {
case '/':
if(relurl[1] == '/') {
cutoff = protsep;
useurl = &relurl[2];
host_changed = TRUE;
}
else
cutoff = strchr(protsep, '/');
break;
case '#':
if(u->fragment)
cutoff = strchr(protsep, '#');
break;
default:
if(u->query && u->query[0])
cutoff = strchr(protsep, '?');
else if(u->fragment && u->fragment[0])
cutoff = strchr(protsep, '#');
if(relurl[0] != '?') {
cutoff = memrchr(protsep, '/',
cutoff ? (size_t)(cutoff - protsep) : strlen(protsep));
if(cutoff)
cutoff++;
}
break;
}
prelen = cutoff ? (size_t)(cutoff - base) : strlen(base);
curlx_dyn_init(&urlbuf, CURL_MAX_INPUT_LENGTH);
if(!curlx_dyn_addn(&urlbuf, base, prelen) &&
!urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed, FALSE)) {
uc = parseurl_and_replace(curlx_dyn_ptr(&urlbuf), u,
flags & ~CURLU_PATH_AS_IS);
}
else
uc = CURLUE_OUT_OF_MEMORY;
curlx_dyn_free(&urlbuf);
return uc;
}
CURLUcode Curl_junkscan(const char *url, size_t *urllen, bool allowspace)
{
size_t n = strlen(url);
size_t i;
unsigned char control;
const unsigned char *p = (const unsigned char *)url;
if(n > CURL_MAX_INPUT_LENGTH)
return CURLUE_MALFORMED_INPUT;
control = allowspace ? 0x1f : 0x20;
for(i = 0; i < n; i++) {
if(p[i] <= control || p[i] == 127)
return CURLUE_MALFORMED_INPUT;
}
*urllen = n;
return CURLUE_OK;
}
static CURLUcode parse_hostname_login(struct Curl_URL *u,
const char *login,
size_t len,
unsigned int flags,
size_t *offset)
{
CURLUcode result = CURLUE_OK;
CURLcode ccode;
char *userp = NULL;
char *passwdp = NULL;
char *optionsp = NULL;
const struct Curl_handler *h = NULL;
char *ptr;
DEBUGASSERT(login);
*offset = 0;
ptr = memchr(login, '@', len);
if(!ptr)
goto out;
ptr++;
if(u->scheme)
h = Curl_get_scheme_handler(u->scheme);
ccode = Curl_parse_login_details(login, ptr - login - 1,
&userp, &passwdp,
(h && (h->flags & PROTOPT_URLOPTIONS)) ?
&optionsp : NULL);
if(ccode) {
result = CURLUE_BAD_LOGIN;
goto out;
}
if(userp) {
if(flags & CURLU_DISALLOW_USER) {
result = CURLUE_USER_NOT_ALLOWED;
goto out;
}
free(u->user);
u->user = userp;
}
if(passwdp) {
free(u->password);
u->password = passwdp;
}
if(optionsp) {
free(u->options);
u->options = optionsp;
}
*offset = ptr - login;
return CURLUE_OK;
out:
free(userp);
free(passwdp);
free(optionsp);
u->user = NULL;
u->password = NULL;
u->options = NULL;
return result;
}
UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
bool has_scheme)
{
const char *portptr;
char *hostname = curlx_dyn_ptr(host);
if(hostname[0] == '[') {
portptr = strchr(hostname, ']');
if(!portptr)
return CURLUE_BAD_IPV6;
portptr++;
if(*portptr) {
if(*portptr != ':')
return CURLUE_BAD_PORT_NUMBER;
}
else
portptr = NULL;
}
else
portptr = strchr(hostname, ':');
if(portptr) {
curl_off_t port;
size_t keep = portptr - hostname;
curlx_dyn_setlen(host, keep);
portptr++;
if(!*portptr)
return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
if(curlx_str_number(&portptr, &port, 0xffff) || *portptr)
return CURLUE_BAD_PORT_NUMBER;
u->portnum = (unsigned short) port;
free(u->port);
u->port = curl_maprintf("%" CURL_FORMAT_CURL_OFF_T, port);
if(!u->port)
return CURLUE_OUT_OF_MEMORY;
}
return CURLUE_OK;
}
static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
size_t hlen)
{
size_t len;
DEBUGASSERT(*hostname == '[');
if(hlen < 4)
return CURLUE_BAD_IPV6;
hostname++;
hlen -= 2;
len = strspn(hostname, "0123456789abcdefABCDEF:.");
if(hlen != len) {
hlen = len;
if(hostname[len] == '%') {
char zoneid[16];
int i = 0;
char *h = &hostname[len + 1];
if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
h += 2;
while(*h && (*h != ']') && (i < 15))
zoneid[i++] = *h++;
if(!i || (']' != *h))
return CURLUE_BAD_IPV6;
zoneid[i] = 0;
u->zoneid = strdup(zoneid);
if(!u->zoneid)
return CURLUE_OUT_OF_MEMORY;
hostname[len] = ']';
hostname[len + 1] = 0;
}
else
return CURLUE_BAD_IPV6;
}
{
char dest[16];
hostname[hlen] = 0;
if(curlx_inet_pton(AF_INET6, hostname, dest) != 1)
return CURLUE_BAD_IPV6;
if(curlx_inet_ntop(AF_INET6, dest, hostname, hlen)) {
hlen = strlen(hostname);
hostname[hlen + 1] = 0;
}
hostname[hlen] = ']';
}
return CURLUE_OK;
}
static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
size_t hlen)
{
size_t len;
DEBUGASSERT(hostname);
if(!hlen)
return CURLUE_NO_HOST;
else if(hostname[0] == '[')
return ipv6_parse(u, hostname, hlen);
else {
len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
if(hlen != len)
return CURLUE_BAD_HOSTNAME;
}
return CURLUE_OK;
}
#define HOST_ERROR -1
#define HOST_NAME 1
#define HOST_IPV4 2
#define HOST_IPV6 3
static int ipv4_normalize(struct dynbuf *host)
{
bool done = FALSE;
int n = 0;
const char *c = curlx_dyn_ptr(host);
unsigned int parts[4] = {0, 0, 0, 0};
CURLcode result = CURLE_OK;
if(*c == '[')
return HOST_IPV6;
while(!done) {
int rc;
curl_off_t l;
if(*c == '0') {
if(c[1] == 'x') {
c += 2;
rc = curlx_str_hex(&c, &l, UINT_MAX);
}
else
rc = curlx_str_octal(&c, &l, UINT_MAX);
}
else
rc = curlx_str_number(&c, &l, UINT_MAX);
if(rc)
return HOST_NAME;
parts[n] = (unsigned int)l;
switch(*c) {
case '.':
if(n == 3)
return HOST_NAME;
n++;
c++;
break;
case '\0':
done = TRUE;
break;
default:
return HOST_NAME;
}
}
switch(n) {
case 0:
curlx_dyn_reset(host);
result = curlx_dyn_addf(host, "%u.%u.%u.%u",
(parts[0] >> 24),
((parts[0] >> 16) & 0xff),
((parts[0] >> 8) & 0xff),
(parts[0] & 0xff));
break;
case 1:
if((parts[0] > 0xff) || (parts[1] > 0xffffff))
return HOST_NAME;
curlx_dyn_reset(host);
result = curlx_dyn_addf(host, "%u.%u.%u.%u",
(parts[0]),
((parts[1] >> 16) & 0xff),
((parts[1] >> 8) & 0xff),
(parts[1] & 0xff));
break;
case 2:
if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
return HOST_NAME;
curlx_dyn_reset(host);
result = curlx_dyn_addf(host, "%u.%u.%u.%u",
(parts[0]),
(parts[1]),
((parts[2] >> 8) & 0xff),
(parts[2] & 0xff));
break;
case 3:
if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
(parts[3] > 0xff))
return HOST_NAME;
curlx_dyn_reset(host);
result = curlx_dyn_addf(host, "%u.%u.%u.%u",
(parts[0]),
(parts[1]),
(parts[2]),
(parts[3]));
break;
}
if(result)
return HOST_ERROR;
return HOST_IPV4;
}
static CURLUcode urldecode_host(struct dynbuf *host)
{
char *per = NULL;
const char *hostname = curlx_dyn_ptr(host);
per = strchr(hostname, '%');
if(!per)
return CURLUE_OK;
else {
size_t dlen;
char *decoded;
CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
REJECT_CTRL);
if(result)
return CURLUE_BAD_HOSTNAME;
curlx_dyn_reset(host);
result = curlx_dyn_addn(host, decoded, dlen);
free(decoded);
if(result)
return cc2cu(result);
}
return CURLUE_OK;
}
static CURLUcode parse_authority(struct Curl_URL *u,
const char *auth, size_t authlen,
unsigned int flags,
struct dynbuf *host,
bool has_scheme)
{
size_t offset;
CURLUcode uc;
CURLcode result;
uc = parse_hostname_login(u, auth, authlen, flags, &offset);
if(uc)
goto out;
result = curlx_dyn_addn(host, auth + offset, authlen - offset);
if(result) {
uc = cc2cu(result);
goto out;
}
uc = Curl_parse_port(u, host, has_scheme);
if(uc)
goto out;
if(!curlx_dyn_len(host))
return CURLUE_NO_HOST;
switch(ipv4_normalize(host)) {
case HOST_IPV4:
break;
case HOST_IPV6:
uc = ipv6_parse(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
break;
case HOST_NAME:
uc = urldecode_host(host);
if(!uc)
uc = hostname_check(u, curlx_dyn_ptr(host), curlx_dyn_len(host));
break;
case HOST_ERROR:
uc = CURLUE_OUT_OF_MEMORY;
break;
default:
uc = CURLUE_BAD_HOSTNAME;
break;
}
out:
return uc;
}
CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
{
CURLUcode result;
struct dynbuf host;
DEBUGASSERT(authority);
curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
result = parse_authority(u, authority, strlen(authority),
CURLU_DISALLOW_USER, &host, !!u->scheme);
if(result)
curlx_dyn_free(&host);
else {
free(u->host);
u->host = curlx_dyn_ptr(&host);
}
return result;
}
static bool is_dot(const char **str, size_t *clen)
{
const char *p = *str;
if(*p == '.') {
(*str)++;
(*clen)--;
return TRUE;
}
else if((*clen >= 3) &&
(p[0] == '%') && (p[1] == '2') && ((p[2] | 0x20) == 'e')) {
*str += 3;
*clen -= 3;
return TRUE;
}
return FALSE;
}
#define ISSLASH(x) ((x) == '/')
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
{
struct dynbuf out;
CURLcode result = CURLE_OK;
*outp = NULL;
if(clen < 2)
return 0;
curlx_dyn_init(&out, clen + 1);
if(is_dot(&input, &clen)) {
const char *p = input;
size_t blen = clen;
if(!clen)
goto end;
else if(ISSLASH(*p)) {
input = p + 1;
clen--;
}
else if(is_dot(&p, &blen)) {
if(!blen)
goto end;
else if(ISSLASH(*p)) {
input = p + 1;
clen = blen - 1;
}
}
}
while(clen && !result) {
if(ISSLASH(*input)) {
const char *p = &input[1];
size_t blen = clen - 1;
if(is_dot(&p, &blen)) {
if(!blen) {
result = curlx_dyn_addn(&out, "/", 1);
break;
}
else if(ISSLASH(*p)) {
input = p;
clen = blen;
continue;
}
else if(is_dot(&p, &blen) && (ISSLASH(*p) || !blen)) {
size_t len = curlx_dyn_len(&out);
if(len) {
char *ptr = curlx_dyn_ptr(&out);
char *last = memrchr(ptr, '/', len);
if(last)
curlx_dyn_setlen(&out, last - ptr);
}
if(blen) {
input = p;
clen = blen;
continue;
}
result = curlx_dyn_addn(&out, "/", 1);
break;
}
}
}
result = curlx_dyn_addn(&out, input, 1);
input++;
clen--;
}
end:
if(!result) {
if(curlx_dyn_len(&out))
*outp = curlx_dyn_ptr(&out);
else {
*outp = strdup("");
if(!*outp)
return 1;
}
}
return result ? 1 : 0;
}
static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
{
const char *path;
size_t pathlen;
char *query = NULL;
char *fragment = NULL;
char schemebuf[MAX_SCHEME_LEN + 1];
size_t schemelen = 0;
size_t urllen;
CURLUcode result = CURLUE_OK;
size_t fraglen = 0;
struct dynbuf host;
DEBUGASSERT(url);
curlx_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
result = Curl_junkscan(url, &urllen, !!(flags & CURLU_ALLOW_SPACE));
if(result)
goto fail;
schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
flags & (CURLU_GUESS_SCHEME|
CURLU_DEFAULT_SCHEME));
if(schemelen && !strcmp(schemebuf, "file")) {
bool uncpath = FALSE;
if(urllen <= 6) {
result = CURLUE_BAD_FILE_URL;
goto fail;
}
path = &url[5];
pathlen = urllen - 5;
u->scheme = strdup("file");
if(!u->scheme) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
if(path[0] == '/' && path[1] == '/') {
const char *ptr = &path[2];
if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
if(checkprefix("localhost/", ptr) ||
checkprefix("127.0.0.1/", ptr)) {
ptr += 9;
}
else {
#ifdef _WIN32
size_t len;
path = strpbrk(ptr, "/\\:*?\"<>|");
if(!path || *path != '/') {
result = CURLUE_BAD_FILE_URL;
goto fail;
}
len = path - ptr;
if(len) {
CURLcode code = curlx_dyn_addn(&host, ptr, len);
if(code) {
result = cc2cu(code);
goto fail;
}
uncpath = TRUE;
}
ptr -= 2;
#else
result = CURLUE_BAD_FILE_URL;
goto fail;
#endif
}
}
path = ptr;
pathlen = urllen - (ptr - url);
}
if(!uncpath)
curlx_dyn_reset(&host);
#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
STARTS_WITH_URL_DRIVE_PREFIX(path)) {
result = CURLUE_BAD_FILE_URL;
goto fail;
}
#else
if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
path++;
pathlen--;
}
#endif
}
else {
const char *schemep = NULL;
const char *hostp;
size_t hostlen;
if(schemelen) {
int i = 0;
const char *p = &url[schemelen + 1];
while((*p == '/') && (i < 4)) {
p++;
i++;
}
schemep = schemebuf;
if(!Curl_get_scheme_handler(schemep) &&
!(flags & CURLU_NON_SUPPORT_SCHEME)) {
result = CURLUE_UNSUPPORTED_SCHEME;
goto fail;
}
if((i < 1) || (i > 3)) {
result = CURLUE_BAD_SLASHES;
goto fail;
}
hostp = p;
}
else {
if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
result = CURLUE_BAD_SCHEME;
goto fail;
}
if(flags & CURLU_DEFAULT_SCHEME)
schemep = DEFAULT_SCHEME;
hostp = url;
}
if(schemep) {
u->scheme = strdup(schemep);
if(!u->scheme) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
}
hostlen = strcspn(hostp, "/?#");
path = &hostp[hostlen];
pathlen = urllen - (path - url);
if(hostlen) {
result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
if(result)
goto fail;
if((flags & CURLU_GUESS_SCHEME) && !schemep) {
const char *hostname = curlx_dyn_ptr(&host);
if(checkprefix("ftp.", hostname))
schemep = "ftp";
else if(checkprefix("dict.", hostname))
schemep = "dict";
else if(checkprefix("ldap.", hostname))
schemep = "ldap";
else if(checkprefix("imap.", hostname))
schemep = "imap";
else if(checkprefix("smtp.", hostname))
schemep = "smtp";
else if(checkprefix("pop3.", hostname))
schemep = "pop3";
else
schemep = "http";
u->scheme = strdup(schemep);
if(!u->scheme) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
u->guessed_scheme = TRUE;
}
}
else if(flags & CURLU_NO_AUTHORITY) {
if(curlx_dyn_add(&host, "")) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
}
else {
result = CURLUE_NO_HOST;
goto fail;
}
}
fragment = strchr(path, '#');
if(fragment) {
fraglen = pathlen - (fragment - path);
u->fragment_present = TRUE;
if(fraglen > 1) {
if(flags & CURLU_URLENCODE) {
struct dynbuf enc;
curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
if(result)
goto fail;
u->fragment = curlx_dyn_ptr(&enc);
}
else {
u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
if(!u->fragment) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
}
}
pathlen -= fraglen;
}
query = memchr(path, '?', pathlen);
if(query) {
size_t qlen = fragment ? (size_t)(fragment - query) :
pathlen - (query - path);
pathlen -= qlen;
u->query_present = TRUE;
if(qlen > 1) {
if(flags & CURLU_URLENCODE) {
struct dynbuf enc;
curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
if(result)
goto fail;
u->query = curlx_dyn_ptr(&enc);
}
else {
u->query = Curl_memdup0(query + 1, qlen - 1);
if(!u->query) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
}
}
else {
u->query = strdup("");
if(!u->query) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
}
}
if(pathlen && (flags & CURLU_URLENCODE)) {
struct dynbuf enc;
curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
if(result)
goto fail;
pathlen = curlx_dyn_len(&enc);
path = u->path = curlx_dyn_ptr(&enc);
}
if(pathlen <= 1) {
path = NULL;
}
else {
if(!u->path) {
u->path = Curl_memdup0(path, pathlen);
if(!u->path) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
path = u->path;
}
else if(flags & CURLU_URLENCODE)
u->path[pathlen] = 0;
if(!(flags & CURLU_PATH_AS_IS)) {
char *dedot;
int err = dedotdotify(path, pathlen, &dedot);
if(err) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
if(dedot) {
free(u->path);
u->path = dedot;
}
}
}
u->host = curlx_dyn_ptr(&host);
return result;
fail:
curlx_dyn_free(&host);
free_urlhandle(u);
return result;
}
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
unsigned int flags)
{
CURLUcode result;
CURLU tmpurl;
memset(&tmpurl, 0, sizeof(tmpurl));
result = parseurl(url, &tmpurl, flags);
if(!result) {
free_urlhandle(u);
*u = tmpurl;
}
return result;
}
CURLU *curl_url(void)
{
return calloc(1, sizeof(struct Curl_URL));
}
void curl_url_cleanup(CURLU *u)
{
if(u) {
free_urlhandle(u);
free(u);
}
}
#define DUP(dest, src, name) \
do { \
if(src->name) { \
dest->name = strdup(src->name); \
if(!dest->name) \
goto fail; \
} \
} while(0)
CURLU *curl_url_dup(const CURLU *in)
{
struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
if(u) {
DUP(u, in, scheme);
DUP(u, in, user);
DUP(u, in, password);
DUP(u, in, options);
DUP(u, in, host);
DUP(u, in, port);
DUP(u, in, path);
DUP(u, in, query);
DUP(u, in, fragment);
DUP(u, in, zoneid);
u->portnum = in->portnum;
u->fragment_present = in->fragment_present;
u->query_present = in->query_present;
}
return u;
fail:
curl_url_cleanup(u);
return NULL;
}
#ifndef USE_IDN
#define host_decode(x,y) CURLUE_LACKS_IDN
#define host_encode(x,y) CURLUE_LACKS_IDN
#else
static CURLUcode host_decode(const char *host, char **allochost)
{
CURLcode result = Curl_idn_decode(host, allochost);
if(result)
return (result == CURLE_OUT_OF_MEMORY) ?
CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
return CURLUE_OK;
}
static CURLUcode host_encode(const char *host, char **allochost)
{
CURLcode result = Curl_idn_encode(host, allochost);
if(result)
return (result == CURLE_OUT_OF_MEMORY) ?
CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
return CURLUE_OK;
}
#endif
static CURLUcode urlget_format(const CURLU *u, CURLUPart what,
const char *ptr, char **part,
bool plusdecode, unsigned int flags)
{
size_t partlen = strlen(ptr);
bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0;
bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
bool punycode = (flags & CURLU_PUNYCODE) && (what == CURLUPART_HOST);
bool depunyfy = (flags & CURLU_PUNY2IDN) && (what == CURLUPART_HOST);
*part = Curl_memdup0(ptr, partlen);
if(!*part)
return CURLUE_OUT_OF_MEMORY;
if(plusdecode) {
char *plus = *part;
size_t i = 0;
for(i = 0; i < partlen; ++plus, i++) {
if(*plus == '+')
*plus = ' ';
}
}
if(urldecode) {
char *decoded;
size_t dlen;
CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
free(*part);
if(res) {
*part = NULL;
return CURLUE_URLDECODE;
}
*part = decoded;
partlen = dlen;
}
if(urlencode) {
struct dynbuf enc;
CURLUcode uc;
curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
if(uc)
return uc;
free(*part);
*part = curlx_dyn_ptr(&enc);
}
else if(punycode) {
if(!Curl_is_ASCII_name(u->host)) {
char *allochost = NULL;
CURLUcode ret = host_decode(*part, &allochost);
if(ret)
return ret;
free(*part);
*part = allochost;
}
}
else if(depunyfy) {
if(Curl_is_ASCII_name(u->host)) {
char *allochost = NULL;
CURLUcode ret = host_encode(*part, &allochost);
if(ret)
return ret;
free(*part);
*part = allochost;
}
}
return CURLUE_OK;
}
static CURLUcode urlget_url(const CURLU *u, char **part, unsigned int flags)
{
char *url;
const char *scheme;
char *options = u->options;
char *port = u->port;
char *allochost = NULL;
bool show_fragment =
u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
bool show_query = (u->query && u->query[0]) ||
(u->query_present && flags & CURLU_GET_EMPTY);
bool punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
bool depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
char portbuf[7];
if(u->scheme && curl_strequal("file", u->scheme)) {
url = curl_maprintf("file://%s%s%s%s%s",
u->path,
show_query ? "?": "",
u->query ? u->query : "",
show_fragment ? "#": "",
u->fragment ? u->fragment : "");
}
else if(!u->host)
return CURLUE_NO_HOST;
else {
const struct Curl_handler *h = NULL;
char schemebuf[MAX_SCHEME_LEN + 5];
if(u->scheme)
scheme = u->scheme;
else if(flags & CURLU_DEFAULT_SCHEME)
scheme = DEFAULT_SCHEME;
else
return CURLUE_NO_SCHEME;
h = Curl_get_scheme_handler(scheme);
if(!port && (flags & CURLU_DEFAULT_PORT)) {
if(h) {
curl_msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
port = portbuf;
}
}
else if(port) {
if(h && (h->defport == u->portnum) &&
(flags & CURLU_NO_DEFAULT_PORT))
port = NULL;
}
if(h && !(h->flags & PROTOPT_URLOPTIONS))
options = NULL;
if(u->host[0] == '[') {
if(u->zoneid) {
struct dynbuf enc;
size_t hostlen = strlen(u->host);
curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
if(curlx_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
u->zoneid))
return CURLUE_OUT_OF_MEMORY;
allochost = curlx_dyn_ptr(&enc);
}
}
else if(urlencode) {
allochost = curl_easy_escape(NULL, u->host, 0);
if(!allochost)
return CURLUE_OUT_OF_MEMORY;
}
else if(punycode) {
if(!Curl_is_ASCII_name(u->host)) {
CURLUcode ret = host_decode(u->host, &allochost);
if(ret)
return ret;
}
}
else if(depunyfy) {
if(Curl_is_ASCII_name(u->host)) {
CURLUcode ret = host_encode(u->host, &allochost);
if(ret)
return ret;
}
}
if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme)
curl_msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme);
else
schemebuf[0] = 0;
url = curl_maprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
schemebuf,
u->user ? u->user : "",
u->password ? ":": "",
u->password ? u->password : "",
options ? ";" : "",
options ? options : "",
(u->user || u->password || options) ? "@": "",
allochost ? allochost : u->host,
port ? ":": "",
port ? port : "",
u->path ? u->path : "/",
show_query ? "?": "",
u->query ? u->query : "",
show_fragment ? "#": "",
u->fragment ? u->fragment : "");
free(allochost);
}
if(!url)
return CURLUE_OUT_OF_MEMORY;
*part = url;
return CURLUE_OK;
}
CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
char **part, unsigned int flags)
{
const char *ptr;
CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
char portbuf[7];
bool plusdecode = FALSE;
if(!u)
return CURLUE_BAD_HANDLE;
if(!part)
return CURLUE_BAD_PARTPOINTER;
*part = NULL;
switch(what) {
case CURLUPART_SCHEME:
ptr = u->scheme;
ifmissing = CURLUE_NO_SCHEME;
flags &= ~CURLU_URLDECODE;
if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme)
return CURLUE_NO_SCHEME;
break;
case CURLUPART_USER:
ptr = u->user;
ifmissing = CURLUE_NO_USER;
break;
case CURLUPART_PASSWORD:
ptr = u->password;
ifmissing = CURLUE_NO_PASSWORD;
break;
case CURLUPART_OPTIONS:
ptr = u->options;
ifmissing = CURLUE_NO_OPTIONS;
break;
case CURLUPART_HOST:
ptr = u->host;
ifmissing = CURLUE_NO_HOST;
break;
case CURLUPART_ZONEID:
ptr = u->zoneid;
ifmissing = CURLUE_NO_ZONEID;
break;
case CURLUPART_PORT:
ptr = u->port;
ifmissing = CURLUE_NO_PORT;
flags &= ~CURLU_URLDECODE;
if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
if(h) {
curl_msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
ptr = portbuf;
}
}
else if(ptr && u->scheme) {
const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
if(h && (h->defport == u->portnum) &&
(flags & CURLU_NO_DEFAULT_PORT))
ptr = NULL;
}
break;
case CURLUPART_PATH:
ptr = u->path;
if(!ptr)
ptr = "/";
break;
case CURLUPART_QUERY:
ptr = u->query;
ifmissing = CURLUE_NO_QUERY;
plusdecode = flags & CURLU_URLDECODE;
if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
ptr = NULL;
break;
case CURLUPART_FRAGMENT:
ptr = u->fragment;
ifmissing = CURLUE_NO_FRAGMENT;
if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
ptr = "";
break;
case CURLUPART_URL:
return urlget_url(u, part, flags);
default:
ptr = NULL;
break;
}
if(ptr)
return urlget_format(u, what, ptr, part, plusdecode, flags);
return ifmissing;
}
static CURLUcode set_url_scheme(CURLU *u, const char *scheme,
unsigned int flags)
{
size_t plen = strlen(scheme);
const struct Curl_handler *h = NULL;
if((plen > MAX_SCHEME_LEN) || (plen < 1))
return CURLUE_BAD_SCHEME;
h = Curl_get_scheme_handler(scheme);
if(!h) {
const char *s = scheme;
if(!(flags & CURLU_NON_SUPPORT_SCHEME))
return CURLUE_UNSUPPORTED_SCHEME;
if(ISALPHA(*s)) {
while(--plen) {
if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
s++;
else
return CURLUE_BAD_SCHEME;
}
}
else
return CURLUE_BAD_SCHEME;
}
u->guessed_scheme = FALSE;
return CURLUE_OK;
}
static CURLUcode set_url_port(CURLU *u, const char *provided_port)
{
char *tmp;
curl_off_t port;
if(!ISDIGIT(provided_port[0]))
return CURLUE_BAD_PORT_NUMBER;
if(curlx_str_number(&provided_port, &port, 0xffff) || *provided_port)
return CURLUE_BAD_PORT_NUMBER;
tmp = curl_maprintf("%" CURL_FORMAT_CURL_OFF_T, port);
if(!tmp)
return CURLUE_OUT_OF_MEMORY;
free(u->port);
u->port = tmp;
u->portnum = (unsigned short)port;
return CURLUE_OK;
}
static CURLUcode set_url(CURLU *u, const char *url, size_t part_size,
unsigned int flags)
{
CURLUcode uc;
char *oldurl = NULL;
if(!part_size) {
if(!curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
free(oldurl);
return CURLUE_OK;
}
return CURLUE_MALFORMED_INPUT;
}
if(Curl_is_absolute_url(url, NULL, 0,
flags & (CURLU_GUESS_SCHEME|CURLU_DEFAULT_SCHEME))
|| curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
return parseurl_and_replace(url, u, flags);
}
DEBUGASSERT(oldurl);
uc = redirect_url(oldurl, url, u, flags);
free(oldurl);
return uc;
}
static CURLUcode urlset_clear(CURLU *u, CURLUPart what)
{
switch(what) {
case CURLUPART_URL:
free_urlhandle(u);
memset(u, 0, sizeof(struct Curl_URL));
break;
case CURLUPART_SCHEME:
Curl_safefree(u->scheme);
u->guessed_scheme = FALSE;
break;
case CURLUPART_USER:
Curl_safefree(u->user);
break;
case CURLUPART_PASSWORD:
Curl_safefree(u->password);
break;
case CURLUPART_OPTIONS:
Curl_safefree(u->options);
break;
case CURLUPART_HOST:
Curl_safefree(u->host);
break;
case CURLUPART_ZONEID:
Curl_safefree(u->zoneid);
break;
case CURLUPART_PORT:
u->portnum = 0;
Curl_safefree(u->port);
break;
case CURLUPART_PATH:
Curl_safefree(u->path);
break;
case CURLUPART_QUERY:
Curl_safefree(u->query);
u->query_present = FALSE;
break;
case CURLUPART_FRAGMENT:
Curl_safefree(u->fragment);
u->fragment_present = FALSE;
break;
default:
return CURLUE_UNKNOWN_PART;
}
return CURLUE_OK;
}
static bool allowed_in_path(unsigned char x)
{
switch(x) {
case '!': case '$': case '&': case '\'':
case '(': case ')': case '{': case '}':
case '[': case ']': case '*': case '+':
case ',': case ';': case '=': case ':':
case '@': case '/':
return TRUE;
}
return FALSE;
}
CURLUcode curl_url_set(CURLU *u, CURLUPart what,
const char *part, unsigned int flags)
{
char **storep = NULL;
bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
bool plusencode = FALSE;
bool pathmode = FALSE;
bool leadingslash = FALSE;
bool appendquery = FALSE;
bool equalsencode = FALSE;
size_t nalloc;
if(!u)
return CURLUE_BAD_HANDLE;
if(!part)
return urlset_clear(u, what);
nalloc = strlen(part);
if(nalloc > CURL_MAX_INPUT_LENGTH)
return CURLUE_MALFORMED_INPUT;
switch(what) {
case CURLUPART_SCHEME: {
CURLUcode status = set_url_scheme(u, part, flags);
if(status)
return status;
storep = &u->scheme;
urlencode = FALSE;
break;
}
case CURLUPART_USER:
storep = &u->user;
break;
case CURLUPART_PASSWORD:
storep = &u->password;
break;
case CURLUPART_OPTIONS:
storep = &u->options;
break;
case CURLUPART_HOST:
storep = &u->host;
Curl_safefree(u->zoneid);
break;
case CURLUPART_ZONEID:
storep = &u->zoneid;
break;
case CURLUPART_PORT:
return set_url_port(u, part);
case CURLUPART_PATH:
pathmode = TRUE;
leadingslash = TRUE;
storep = &u->path;
break;
case CURLUPART_QUERY:
plusencode = urlencode;
appendquery = (flags & CURLU_APPENDQUERY) ? 1 : 0;
equalsencode = appendquery;
storep = &u->query;
u->query_present = TRUE;
break;
case CURLUPART_FRAGMENT:
storep = &u->fragment;
u->fragment_present = TRUE;
break;
case CURLUPART_URL:
return set_url(u, part, nalloc, flags);
default:
return CURLUE_UNKNOWN_PART;
}
DEBUGASSERT(storep);
{
const char *newp;
struct dynbuf enc;
curlx_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
if(leadingslash && (part[0] != '/')) {
CURLcode result = curlx_dyn_addn(&enc, "/", 1);
if(result)
return cc2cu(result);
}
if(urlencode) {
const unsigned char *i;
for(i = (const unsigned char *)part; *i; i++) {
CURLcode result;
if((*i == ' ') && plusencode) {
result = curlx_dyn_addn(&enc, "+", 1);
if(result)
return CURLUE_OUT_OF_MEMORY;
}
else if(ISUNRESERVED(*i) ||
(pathmode && allowed_in_path(*i)) ||
((*i == '=') && equalsencode)) {
if((*i == '=') && equalsencode)
equalsencode = FALSE;
result = curlx_dyn_addn(&enc, i, 1);
if(result)
return cc2cu(result);
}
else {
unsigned char out[3]={'%'};
Curl_hexbyte(&out[1], *i);
result = curlx_dyn_addn(&enc, out, 3);
if(result)
return cc2cu(result);
}
}
}
else {
char *p;
CURLcode result = curlx_dyn_add(&enc, part);
if(result)
return cc2cu(result);
p = curlx_dyn_ptr(&enc);
while(*p) {
if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
(ISUPPER(p[1]) || ISUPPER(p[2]))) {
p[1] = Curl_raw_tolower(p[1]);
p[2] = Curl_raw_tolower(p[2]);
p += 3;
}
else
p++;
}
}
newp = curlx_dyn_ptr(&enc);
if(appendquery && newp) {
size_t querylen = u->query ? strlen(u->query) : 0;
bool addamperand = querylen && (u->query[querylen -1] != '&');
if(querylen) {
struct dynbuf qbuf;
curlx_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
if(curlx_dyn_addn(&qbuf, u->query, querylen))
goto nomem;
if(addamperand) {
if(curlx_dyn_addn(&qbuf, "&", 1))
goto nomem;
}
if(curlx_dyn_add(&qbuf, newp))
goto nomem;
curlx_dyn_free(&enc);
free(*storep);
*storep = curlx_dyn_ptr(&qbuf);
return CURLUE_OK;
nomem:
curlx_dyn_free(&enc);
return CURLUE_OUT_OF_MEMORY;
}
}
else if(what == CURLUPART_HOST) {
size_t n = curlx_dyn_len(&enc);
if(!n && (flags & CURLU_NO_AUTHORITY)) {
}
else {
bool bad = FALSE;
if(!n)
bad = TRUE;
else if(!urlencode) {
size_t dlen;
char *decoded = NULL;
CURLcode result =
Curl_urldecode(newp, n, &decoded, &dlen, REJECT_CTRL);
if(result || hostname_check(u, decoded, dlen))
bad = TRUE;
free(decoded);
}
else if(hostname_check(u, (char *)CURL_UNCONST(newp), n))
bad = TRUE;
if(bad) {
curlx_dyn_free(&enc);
return CURLUE_BAD_HOSTNAME;
}
}
}
free(*storep);
*storep = (char *)CURL_UNCONST(newp);
}
return CURLUE_OK;
}