#include "cli.h"
#include <curl/curl.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
typedef struct {
const char *tag;
const char *attr;
const char *require_attr;
const char *require_val;
int is_text;
int is_css;
} GreedyRule;
static const GreedyRule rules[] = {
{ "link", "href", "rel", "stylesheet", 1, 1 },
{ "link", "href", "rel", "icon", 0, 0 },
{ "link", "href", "rel", "shortcut icon", 0, 0 },
{ "link", "href", "rel", "manifest", 1, 0 },
{ "link", "href", "rel", "preload", 0, 0 },
{ "script", "src", NULL, NULL, 1, 0 },
{ "img", "src", NULL, NULL, 0, 0 },
{ "video", "src", NULL, NULL, 0, 0 },
{ "video", "poster", NULL, NULL, 0, 0 },
{ "audio", "src", NULL, NULL, 0, 0 },
{ "source", "src", NULL, NULL, 0, 0 },
{ "embed", "src", NULL, NULL, 0, 0 },
{ "object", "data", NULL, NULL, 0, 0 },
{ "track", "src", NULL, NULL, 1, 0 },
{ "input", "src", "type", "image", 0, 0 },
{ "body", "background", NULL, NULL, 0, 0 },
{ "use", "xlink:href", NULL, NULL, 0, 0 },
{ "use", "href", NULL, NULL, 0, 0 },
{ "image", "xlink:href", NULL, NULL, 0, 0 },
{ "image", "href", NULL, NULL, 0, 0 },
{ NULL, NULL, NULL, NULL, 0, 0 }
};
typedef uint8_t *(*ResourceFetcher)(const char *base, const char *url,
size_t *out_len);
static uint8_t *greedy_read_file(const char *path, size_t *out_len)
{
FILE *f = fopen(path, "rb");
if (!f) { perror(path); return NULL; }
fseek(f, 0, SEEK_END);
long sz = ftell(f);
if (sz <= 0) { fclose(f); return NULL; }
fseek(f, 0, SEEK_SET);
uint8_t *buf = malloc((size_t)sz);
if (!buf) { fclose(f); return NULL; }
fread(buf, 1, (size_t)sz, f);
fclose(f);
*out_len = (size_t)sz;
return buf;
}
static char *resolve_url_fs(const char *base_dir, const char *url)
{
if (strstr(url, "://") || strncmp(url, "//", 2) == 0 ||
strncmp(url, "data:", 5) == 0)
return NULL;
if (url[0] == '/')
return strdup(url);
size_t ulen = strcspn(url, "?#");
if (ulen == 0) return NULL;
size_t blen = strlen(base_dir);
char *path = malloc(blen + 1 + ulen + 1);
memcpy(path, base_dir, blen);
path[blen] = '/';
memcpy(path + blen + 1, url, ulen);
path[blen + 1 + ulen] = '\0';
return path;
}
static uint8_t *fetch_from_fs(const char *base, const char *url, size_t *out_len)
{
char *path = resolve_url_fs(base, url);
if (!path) return NULL;
uint8_t *data = greedy_read_file(path, out_len);
free(path);
return data;
}
static uint8_t *fetch_from_http(const char *base, const char *url, size_t *out_len)
{
if (strncmp(url, "data:", 5) == 0) return NULL;
CURLU *h = curl_url();
if (!h) return NULL;
curl_url_set(h, CURLUPART_URL, base, 0);
if (curl_url_set(h, CURLUPART_URL, url, 0) != CURLUE_OK) {
curl_url_cleanup(h);
return NULL;
}
char *abs;
if (curl_url_get(h, CURLUPART_URL, &abs, 0) != CURLUE_OK) {
curl_url_cleanup(h);
return NULL;
}
curl_url_cleanup(h);
uint8_t *data = fetch_url_bytes(abs, out_len);
curl_free(abs);
return data;
}
static const char *css_mime_from_ext(const char *url)
{
const char *dot = strrchr(url, '.');
if (!dot) return "application/octet-stream";
size_t el = strcspn(dot, "?#");
if (el == 6 && strncasecmp(dot, ".woff2", 6) == 0) return "font/woff2";
if (el == 5 && strncasecmp(dot, ".woff", 5) == 0) return "font/woff";
if (el == 4 && strncasecmp(dot, ".ttf", 4) == 0) return "font/ttf";
if (el == 4 && strncasecmp(dot, ".otf", 4) == 0) return "font/otf";
if (el == 4 && strncasecmp(dot, ".svg", 4) == 0) return "image/svg+xml";
if (el == 4 && strncasecmp(dot, ".png", 4) == 0) return "image/png";
if (el == 4 && strncasecmp(dot, ".gif", 4) == 0) return "image/gif";
if (el == 4 && strncasecmp(dot, ".jpg", 4) == 0) return "image/jpeg";
if (el == 5 && strncasecmp(dot, ".jpeg", 5) == 0) return "image/jpeg";
if (el == 5 && strncasecmp(dot, ".webp", 5) == 0) return "image/webp";
return "application/octet-stream";
}
static char *b64_encode(const uint8_t *data, size_t len)
{
static const char t[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
size_t olen = ((len + 2) / 3) * 4;
char *out = malloc(olen + 1);
if (!out) return NULL;
size_t j = 0;
for (size_t i = 0; i < len; i += 3) {
uint32_t n = (uint32_t)data[i] << 16;
if (i + 1 < len) n |= (uint32_t)data[i + 1] << 8;
if (i + 2 < len) n |= (uint32_t)data[i + 2];
out[j++] = t[(n >> 18) & 63];
out[j++] = t[(n >> 12) & 63];
out[j++] = i + 1 < len ? t[(n >> 6) & 63] : '=';
out[j++] = i + 2 < len ? t[ n & 63] : '=';
}
out[j] = '\0';
return out;
}
static char *css_inline_urls(const char *css, size_t css_len,
const char *base, ResourceFetcher fetch)
{
size_t cap = css_len + 1, len = 0;
char *out = malloc(cap);
if (!out) return NULL;
#define CSS_GROW(need) do { \
if (len + (need) >= cap) { \
cap = (len + (need)) * 2; \
out = realloc(out, cap); \
} \
} while (0)
for (size_t i = 0; i < css_len; ) {
if (i + 7 <= css_len && css[i] == '@' &&
(css[i+1] == 'i' || css[i+1] == 'I') &&
(css[i+2] == 'm' || css[i+2] == 'M') &&
(css[i+3] == 'p' || css[i+3] == 'P') &&
(css[i+4] == 'o' || css[i+4] == 'O') &&
(css[i+5] == 'r' || css[i+5] == 'R') &&
(css[i+6] == 't' || css[i+6] == 'T')) {
size_t tok = i;
i += 7;
while (i < css_len && (css[i] == ' ' || css[i] == '\t')) i++;
char *import_url = NULL;
if (i + 4 <= css_len &&
(css[i]=='u'||css[i]=='U') && (css[i+1]=='r'||css[i+1]=='R') &&
(css[i+2]=='l'||css[i+2]=='L') && css[i+3]=='(') {
i += 4;
while (i < css_len && (css[i]==' '||css[i]=='\t')) i++;
char q = (i < css_len && (css[i]=='"'||css[i]=='\'')) ? css[i++] : 0;
size_t vs = i;
if (q) { while (i < css_len && css[i] != q) i++; }
else { while (i < css_len && css[i] != ')' &&
css[i] != ' ' && css[i] != '\t') i++; }
size_t vlen = i - vs;
if (q && i < css_len) i++;
while (i < css_len && (css[i]==' '||css[i]=='\t')) i++;
if (i < css_len && css[i] == ')') i++;
import_url = malloc(vlen + 1);
memcpy(import_url, css + vs, vlen);
import_url[vlen] = '\0';
} else if (i < css_len && (css[i] == '"' || css[i] == '\'')) {
char q = css[i++];
size_t vs = i;
while (i < css_len && css[i] != q) i++;
size_t vlen = i - vs;
if (i < css_len) i++;
import_url = malloc(vlen + 1);
memcpy(import_url, css + vs, vlen);
import_url[vlen] = '\0';
}
while (i < css_len && css[i] != ';' && css[i] != '\n') i++;
if (i < css_len && css[i] == ';') i++;
int replaced = 0;
if (import_url && import_url[0] &&
strncmp(import_url, "data:", 5) != 0) {
size_t flen;
uint8_t *fdata = fetch(base, import_url, &flen);
if (fdata) {
char *sub = css_inline_urls((char *)fdata, flen, base, fetch);
free(fdata);
if (sub) {
size_t slen = strlen(sub);
CSS_GROW(slen + 2);
memcpy(out + len, sub, slen);
len += slen;
out[len++] = '\n';
free(sub);
fprintf(stderr, "greedy: inlined @import %s (%zu bytes)\n",
import_url, flen);
replaced = 1;
}
}
}
free(import_url);
if (!replaced) {
size_t tlen = i - tok;
CSS_GROW(tlen);
memcpy(out + len, css + tok, tlen);
len += tlen;
}
}
else if (i + 4 <= css_len &&
(css[i] == 'u' || css[i] == 'U') &&
(css[i+1] == 'r' || css[i+1] == 'R') &&
(css[i+2] == 'l' || css[i+2] == 'L') &&
css[i+3] == '(') {
size_t tok = i;
i += 4;
while (i < css_len && (css[i] == ' ' || css[i] == '\t')) i++;
char q = (i < css_len && (css[i] == '"' || css[i] == '\''))
? css[i++] : 0;
size_t vs = i;
if (q) { while (i < css_len && css[i] != q) i++; }
else { while (i < css_len && css[i] != ')' &&
css[i] != ' ' && css[i] != '\t') i++; }
size_t vlen = i - vs;
if (q && i < css_len) i++;
while (i < css_len && (css[i] == ' ' || css[i] == '\t')) i++;
if (i < css_len && css[i] == ')') i++;
char *url = malloc(vlen + 1);
memcpy(url, css + vs, vlen);
url[vlen] = '\0';
int skip = (vlen == 0 ||
strncmp(url, "data:", 5) == 0 || url[0] == '#');
int replaced = 0;
if (!skip) {
size_t flen;
uint8_t *fdata = fetch(base, url, &flen);
if (fdata) {
char *b64 = b64_encode(fdata, flen);
free(fdata);
if (b64) {
const char *mime = css_mime_from_ext(url);
size_t need = 6 + strlen(mime) + 8 + strlen(b64) + 2;
if (len + need >= cap) {
cap = (len + need) * 2;
out = realloc(out, cap);
}
len += (size_t)sprintf(out + len,
"url(data:%s;base64,%s)", mime, b64);
free(b64);
fprintf(stderr, "greedy: inlined css url(%s) (%zu bytes)\n",
url, flen);
replaced = 1;
}
}
}
free(url);
if (!replaced) {
size_t tlen = i - tok;
if (len + tlen >= cap) { cap = (len + tlen) * 2; out = realloc(out, cap); }
memcpy(out + len, css + tok, tlen);
len += tlen;
}
} else {
if (len + 1 >= cap) { cap = cap * 2 + 64; out = realloc(out, cap); }
out[len++] = css[i++];
}
}
if (len + 1 >= cap) out = realloc(out, len + 1);
out[len] = '\0';
#undef CSS_GROW
return out;
}
static const DtobValue *kv_find(const DtobValue *kvs, const char *key)
{
if (!kvs || kvs->type != DTOB_KV_SET) return NULL;
size_t klen = strlen(key);
for (size_t i = 0; i < kvs->num_pairs; i++) {
if (kvs->pairs[i].key_len == klen &&
memcmp(kvs->pairs[i].key, key, klen) == 0)
return kvs->pairs[i].value;
}
return NULL;
}
static void kv_replace(DtobValue *kvs, const char *key, DtobValue *newval)
{
size_t klen = strlen(key);
for (size_t i = 0; i < kvs->num_pairs; i++) {
if (kvs->pairs[i].key_len == klen &&
memcmp(kvs->pairs[i].key, key, klen) == 0) {
dtob_free(kvs->pairs[i].value);
kvs->pairs[i].value = newval;
return;
}
}
dtob_kvset_put(kvs, key, newval);
}
static int srcset_next(const char **p, char **url_out, char **desc_out)
{
while (**p == ' ' || **p == '\t' || **p == '\n' ||
**p == '\r' || **p == ',')
(*p)++;
if (!**p) return 0;
const char *url_start = *p;
while (**p && **p != ' ' && **p != '\t' && **p != '\n' &&
**p != '\r' && **p != ',')
(*p)++;
if (*p == url_start) return 0;
*url_out = malloc((size_t)(*p - url_start) + 1);
memcpy(*url_out, url_start, (size_t)(*p - url_start));
(*url_out)[*p - url_start] = '\0';
while (**p == ' ' || **p == '\t') (*p)++;
if (**p && **p != ',') {
const char *desc_start = *p;
while (**p && **p != ',') (*p)++;
const char *desc_end = *p;
while (desc_end > desc_start &&
(desc_end[-1] == ' ' || desc_end[-1] == '\t'))
desc_end--;
*desc_out = malloc((size_t)(desc_end - desc_start) + 1);
memcpy(*desc_out, desc_start, (size_t)(desc_end - desc_start));
(*desc_out)[desc_end - desc_start] = '\0';
} else {
*desc_out = NULL;
}
return 1;
}
static void greedify_srcset(DtobValue *attrs, const char *base, ResourceFetcher fetch)
{
const DtobValue *sv = kv_find(attrs, "srcset");
if (!sv || !sv->data || sv->data_len == 0) return;
char *srcset = malloc(sv->data_len + 1);
memcpy(srcset, sv->data, sv->data_len);
srcset[sv->data_len] = '\0';
DtobValue *arr = dtob_array();
int any = 0;
const char *p = srcset;
char *url, *desc;
while (srcset_next(&p, &url, &desc)) {
size_t flen;
uint8_t *fdata = fetch(base, url, &flen);
if (fdata) {
fprintf(stderr, "greedy: inlined srcset %s (%zu bytes)\n", url, flen);
DtobValue *entry = dtob_kvset();
dtob_kvset_put(entry, "src", dtob_raw(fdata, flen));
free(fdata);
if (desc)
dtob_kvset_put(entry, "d", json_string(desc, strlen(desc)));
dtob_array_push(arr, entry);
any = 1;
}
free(url);
free(desc);
}
free(srcset);
if (any)
kv_replace(attrs, "srcset", arr);
else
dtob_free(arr);
}
static void greedify_node(DtobValue *v, const char *base, ResourceFetcher fetch);
static void greedify_element(DtobValue *elem, const char *base, ResourceFetcher fetch)
{
const DtobValue *tag_v = kv_find(elem, "t");
DtobValue *attrs = (DtobValue *)kv_find(elem, "a");
DtobValue *children = (DtobValue *)kv_find(elem, "c");
if (tag_v && tag_v->data && tag_v->data_len > 0 && attrs) {
for (int ri = 0; rules[ri].tag; ri++) {
size_t tlen = strlen(rules[ri].tag);
if (tag_v->data_len != tlen ||
strncasecmp((char *)tag_v->data, rules[ri].tag, tlen) != 0)
continue;
if (rules[ri].require_attr) {
const DtobValue *guard = kv_find(attrs, rules[ri].require_attr);
if (!guard || !guard->data) break;
if (strncasecmp((char *)guard->data, rules[ri].require_val,
guard->data_len) != 0 ||
strlen(rules[ri].require_val) != guard->data_len)
break;
}
const DtobValue *url_v = kv_find(attrs, rules[ri].attr);
if (!url_v || !url_v->data || url_v->data_len == 0) break;
char url_z[1024];
size_t ulen = url_v->data_len < sizeof(url_z) - 1
? url_v->data_len : sizeof(url_z) - 1;
memcpy(url_z, url_v->data, ulen);
url_z[ulen] = '\0';
size_t flen;
uint8_t *fdata = fetch(base, url_z, &flen);
if (!fdata) break;
fprintf(stderr, "greedy: inlined %s (%zu bytes)\n", url_z, flen);
DtobValue *content;
if (rules[ri].is_css) {
char *css = css_inline_urls((char *)fdata, flen, base, fetch);
free(fdata);
if (!css) break;
content = json_string(css, strlen(css));
free(css);
} else if (rules[ri].is_text) {
content = json_string((char *)fdata, flen);
free(fdata);
} else {
content = dtob_raw(fdata, flen);
free(fdata);
}
kv_replace(attrs, rules[ri].attr, content);
break;
}
}
if (tag_v && tag_v->data && tag_v->data_len > 0 && attrs) {
if ((tag_v->data_len == 3 &&
strncasecmp((char *)tag_v->data, "img", 3) == 0) ||
(tag_v->data_len == 6 &&
strncasecmp((char *)tag_v->data, "source", 6) == 0)) {
greedify_srcset(attrs, base, fetch);
}
}
if (tag_v && tag_v->data && tag_v->data_len == 7 &&
strncasecmp((char *)tag_v->data, "picture", 7) == 0 && children) {
if (children->type == DTOB_ARRAY) {
for (size_t i = 0; i < children->num_elements; i++) {
DtobValue *child = children->elements[i];
if (!child || child->type != DTOB_KV_SET) continue;
const DtobValue *ctag = kv_find(child, "t");
DtobValue *cattrs = (DtobValue *)kv_find(child, "a");
if (!ctag || !ctag->data || !cattrs) continue;
if ((ctag->data_len == 6 &&
strncasecmp((char *)ctag->data, "source", 6) == 0) ||
(ctag->data_len == 3 &&
strncasecmp((char *)ctag->data, "img", 3) == 0)) {
greedify_srcset(cattrs, base, fetch);
}
}
}
}
if (children) greedify_node(children, base, fetch);
}
static void greedify_node(DtobValue *v, const char *base, ResourceFetcher fetch)
{
if (!v) return;
if (v->type == DTOB_KV_SET) {
if (kv_find(v, "t"))
greedify_element(v, base, fetch);
else
for (size_t i = 0; i < v->num_pairs; i++)
greedify_node(v->pairs[i].value, base, fetch);
} else if (v->type == DTOB_ARRAY) {
for (size_t i = 0; i < v->num_elements; i++)
greedify_node(v->elements[i], base, fetch);
}
}
void html_greedify(DtobValue *root, const char *input_path)
{
char base_dir[4096];
const char *last_slash = strrchr(input_path, '/');
if (last_slash) {
size_t len = (size_t)(last_slash - input_path);
if (len >= sizeof(base_dir)) len = sizeof(base_dir) - 1;
memcpy(base_dir, input_path, len);
base_dir[len] = '\0';
} else {
base_dir[0] = '.';
base_dir[1] = '\0';
}
greedify_node(root, base_dir, fetch_from_fs);
}
void html_greedify_url(DtobValue *root, const char *base_url)
{
greedify_node(root, base_url, fetch_from_http);
}
void resource_list_init(ResourceList *rl)
{
rl->cap = 16;
rl->entries = malloc(rl->cap * sizeof(ResourceEntry));
rl->count = 0;
}
void resource_list_free(ResourceList *rl)
{
for (size_t i = 0; i < rl->count; i++)
free(rl->entries[i].data);
free(rl->entries);
}
static size_t resource_list_add(ResourceList *rl, uint8_t *data, size_t len,
int is_text)
{
if (rl->count >= rl->cap) {
rl->cap *= 2;
rl->entries = realloc(rl->entries, rl->cap * sizeof(ResourceEntry));
}
size_t idx = rl->count++;
rl->entries[idx].data = data;
rl->entries[idx].len = len;
rl->entries[idx].is_text = is_text;
return idx;
}
typedef struct {
char *data;
size_t len;
size_t cap;
} StrBuf;
static void sb_init(StrBuf *sb)
{
sb->cap = 4096;
sb->len = 0;
sb->data = malloc(sb->cap);
}
static void sb_ensure(StrBuf *sb, size_t extra)
{
if (sb->len + extra >= sb->cap) {
sb->cap = (sb->len + extra) * 2;
sb->data = realloc(sb->data, sb->cap);
}
}
static void sb_append(StrBuf *sb, const char *s, size_t n)
{
sb_ensure(sb, n + 1);
memcpy(sb->data + sb->len, s, n);
sb->len += n;
sb->data[sb->len] = '\0';
}
static void sb_appends(StrBuf *sb, const char *s)
{
sb_append(sb, s, strlen(s));
}
static size_t skip_ws(const char *s, size_t len, size_t pos)
{
while (pos < len && (s[pos] == ' ' || s[pos] == '\t' ||
s[pos] == '\n' || s[pos] == '\r'))
pos++;
return pos;
}
static char *extract_attr_value(const char *html, size_t html_len,
size_t *pos, size_t *val_start_out,
size_t *val_end_out)
{
size_t p = skip_ws(html, html_len, *pos);
if (p >= html_len) return NULL;
char q = 0;
if (html[p] == '"' || html[p] == '\'') {
q = html[p]; p++;
}
size_t vs = p;
if (q) {
while (p < html_len && html[p] != q) p++;
} else {
while (p < html_len && html[p] != ' ' && html[p] != '\t' &&
html[p] != '\n' && html[p] != '\r' && html[p] != '>')
p++;
}
size_t vlen = p - vs;
char *val = malloc(vlen + 1);
memcpy(val, html + vs, vlen);
val[vlen] = '\0';
*val_start_out = vs;
*val_end_out = p;
if (q && p < html_len) p++;
*pos = p;
return val;
}
static char *find_attr_in_tag(const char *html, size_t tag_start, size_t tag_end,
const char *attr_name,
size_t *val_start, size_t *val_end)
{
size_t alen = strlen(attr_name);
size_t p = tag_start;
while (p < tag_end && html[p] != ' ' && html[p] != '\t' &&
html[p] != '\n' && html[p] != '\r' && html[p] != '/' &&
html[p] != '>')
p++;
while (p < tag_end) {
p = skip_ws(html, tag_end, p);
if (p >= tag_end) break;
size_t ns = p;
while (p < tag_end && html[p] != '=' && html[p] != ' ' &&
html[p] != '\t' && html[p] != '\n' && html[p] != '>' &&
html[p] != '/')
p++;
size_t nlen = p - ns;
p = skip_ws(html, tag_end, p);
if (p < tag_end && html[p] == '=') {
p++;
if (nlen == alen &&
strncasecmp(html + ns, attr_name, alen) == 0) {
return extract_attr_value(html, tag_end, &p,
val_start, val_end);
} else {
size_t dummy_s, dummy_e;
char *skip = extract_attr_value(html, tag_end, &p,
&dummy_s, &dummy_e);
free(skip);
}
}
}
return NULL;
}
static char *raw_greedify_impl(const char *html, size_t html_len,
const char *base, ResourceFetcher fetch,
ResourceList *rl)
{
StrBuf out;
sb_init(&out);
size_t i = 0;
while (i < html_len) {
if (html[i] != '<') {
sb_append(&out, html + i, 1);
i++;
continue;
}
size_t tag_start = i;
i++;
if (i >= html_len || html[i] == '!' || html[i] == '?') {
sb_append(&out, "<", 1);
continue;
}
size_t name_start = i;
while (i < html_len && html[i] != ' ' && html[i] != '\t' &&
html[i] != '\n' && html[i] != '\r' && html[i] != '>' &&
html[i] != '/')
i++;
size_t name_len = i - name_start;
size_t tag_end = i;
while (tag_end < html_len && html[tag_end] != '>') tag_end++;
if (tag_end >= html_len) {
sb_append(&out, html + tag_start, html_len - tag_start);
break;
}
tag_end++;
int handled = 0;
for (int ri = 0; rules[ri].tag; ri++) {
size_t tlen = strlen(rules[ri].tag);
if (name_len != tlen ||
strncasecmp(html + name_start, rules[ri].tag, tlen) != 0)
continue;
if (rules[ri].require_attr) {
size_t gvs, gve;
char *gval = find_attr_in_tag(html, tag_start, tag_end - 1,
rules[ri].require_attr,
&gvs, &gve);
if (!gval) continue;
int match = (strcasecmp(gval, rules[ri].require_val) == 0);
free(gval);
if (!match) continue;
}
size_t avs, ave;
char *url = find_attr_in_tag(html, tag_start, tag_end - 1,
rules[ri].attr, &avs, &ave);
if (!url || url[0] == '\0') { free(url); continue; }
if (strncmp(url, "data:", 5) == 0 || url[0] == '#') {
free(url);
continue;
}
size_t flen;
uint8_t *fdata = fetch(base, url, &flen);
if (!fdata) { free(url); continue; }
fprintf(stderr, "greedy: extracted %s (%zu bytes)\n", url, flen);
if (rules[ri].is_css) {
char *css = css_inline_urls((char *)fdata, flen, base, fetch);
free(fdata);
if (css) {
fdata = (uint8_t *)css;
flen = strlen(css);
} else {
free(url);
continue;
}
}
size_t idx = resource_list_add(rl, fdata, flen,
rules[ri].is_text);
char placeholder[32];
snprintf(placeholder, sizeof(placeholder), "dtob:%zu", idx);
sb_append(&out, html + tag_start, avs - tag_start);
sb_appends(&out, placeholder);
sb_append(&out, html + ave, tag_end - ave);
free(url);
handled = 1;
break;
}
if (!handled) {
sb_append(&out, html + tag_start, tag_end - tag_start);
}
i = tag_end;
}
return out.data;
}
char *html_raw_greedify(const char *html, size_t html_len,
const char *input_path, ResourceList *rl)
{
char base_dir[4096];
const char *last_slash = strrchr(input_path, '/');
if (last_slash) {
size_t len = (size_t)(last_slash - input_path);
if (len >= sizeof(base_dir)) len = sizeof(base_dir) - 1;
memcpy(base_dir, input_path, len);
base_dir[len] = '\0';
} else {
base_dir[0] = '.';
base_dir[1] = '\0';
}
return raw_greedify_impl(html, html_len, base_dir, fetch_from_fs, rl);
}
char *html_raw_greedify_url(const char *html, size_t html_len,
const char *base_url, ResourceList *rl)
{
return raw_greedify_impl(html, html_len, base_url, fetch_from_http, rl);
}