#include "html.h"
#include "css_selector.h"
#include "css_parser.h"
#include "internal.h"
#include "document.h"
#include <set>
namespace litehtml
{
void css_selector::calc_specificity()
{
if(m_right.m_tag != star_id)
{
m_specificity.d = 1;
}
for(const auto& attr : m_right.m_attrs)
{
if(attr.type == select_id)
{
m_specificity.b++;
} else
{
m_specificity.c++;
}
}
if(m_left)
{
m_left->calc_specificity();
m_specificity += m_left->m_specificity;
}
}
void css_selector::add_media_to_doc( document* doc ) const
{
if(m_media_query && doc)
{
doc->add_media_list(m_media_query);
}
}
string parse_ns_prefix(const css_token_vector& tokens, int& index)
{
const auto& a = at(tokens, index);
const auto& b = at(tokens, index + 1);
if (a.ch == '|')
{
index++;
return "";
}
if ((a.type == IDENT || a.ch == '*') && b.ch == '|')
{
index += 2;
return a.type == IDENT ? a.name : "*";
}
return "";
}
struct wq_name
{
string prefix;
string name;
};
wq_name parse_wq_name(const css_token_vector& tokens, int& index)
{
int start = index;
string prefix = parse_ns_prefix(tokens, index);
auto tok = at(tokens, index);
if (tok.type == IDENT)
{
index++;
return { prefix, tok.name };
}
index = start;
tok = at(tokens, index);
if (tok.type == IDENT)
{
index++;
return { "", tok.name };
}
return {};
}
wq_name parse_type_selector(const css_token_vector& tokens, int& index)
{
int start = index;
string prefix = parse_ns_prefix(tokens, index);
const auto& tok = at(tokens, index);
if (tok.type == IDENT || tok.ch == '*')
{
index++;
string name = tok.type == IDENT ? tok.name : "*";
return { lowcase(prefix), lowcase(name) };
}
index = start;
return {};
}
bool parse_attr_matcher(const css_token_vector& tokens, int& index, attr_matcher& matcher)
{
const auto& a = at(tokens, index);
const auto& b = at(tokens, index + 1);
if (a.ch == '=')
{
index++;
matcher = attribute_equals;
return true;
}
if (!(is_one_of(a.ch, '~', '|', '^', '$', '*') && b.ch == '='))
return false;
index += 2;
matcher = (attr_matcher)a.ch;
return true;
}
css_attribute_selector parse_attribute_selector(const css_token& block)
{
css_attribute_selector selector;
const css_token_vector& tokens = block.value;
int index = 0;
skip_whitespace(tokens, index);
wq_name wq_name = parse_wq_name(tokens, index);
if (wq_name.name == "") return {};
auto prefix = lowcase(wq_name.prefix);
auto name = lowcase(wq_name.name);
skip_whitespace(tokens, index);
if (index == (int)tokens.size()) {
selector.type = select_attr;
selector.prefix = _id(prefix);
selector.name = _id(name);
selector.matcher = attribute_exists;
return selector;
}
skip_whitespace(tokens, index);
attr_matcher matcher;
if (!parse_attr_matcher(tokens, index, matcher))
return {};
skip_whitespace(tokens, index);
const css_token& value = at(tokens, index);
if (value.type != STRING && value.type != IDENT)
return {};
index++;
skip_whitespace(tokens, index);
char modifier = 0;
const css_token& tok = at(tokens, index);
if (tok.type == IDENT)
{
if (tok.ident() == "s") modifier = 's';
else if (tok.ident() == "i") modifier = 'i';
else return {}; index++;
}
skip_whitespace(tokens, index);
if (index != (int)tokens.size())
return {};
static string_vector special_attributes = {
"accept",
"accept-charset",
"align",
"alink",
"axis",
"bgcolor",
"charset",
"checked",
"clear",
"codetype",
"color",
"compact",
"declare",
"defer",
"dir",
"direction",
"disabled",
"enctype",
"face",
"frame",
"hreflang",
"http-equiv",
"lang",
"language",
"link",
"media",
"method",
"multiple",
"nohref",
"noresize",
"noshade",
"nowrap",
"readonly",
"rel",
"rev",
"rules",
"scope",
"scrolling",
"selected",
"shape",
"target",
"text",
"type",
"valign",
"valuetype",
"vlink",
};
selector.type = select_attr;
selector.prefix = _id(prefix);
selector.name = _id(name);
selector.matcher = matcher;
selector.caseless_match = modifier == 'i' || (!modifier && name in special_attributes);
selector.value = selector.caseless_match ? lowcase(value.str) : value.str;
return selector;
}
struct an_b
{
int a, b;
bool valid;
an_b() : a(), b(), valid(false) {}
an_b(int a, int b) : a(a), b(b), valid(true) {}
};
bool to_int(string s, int& number)
{
if (s == "") return false;
const char* ptr = s.c_str();
char* end;
int n = strtol(ptr, &end, 10);
if (end != ptr + s.size())
return false;
number = n;
return true;
}
an_b parse_an_b(string s)
{
lcase(trim(s));
if (s == "even") return {2, 0};
if (s == "odd") return {2, 1};
int a, b;
int i = (int)s.find('n');
if (i == -1)
{
if (!to_int(s, b)) return {};
return {0, b};
}
auto str_a = s.substr(0, i);
auto str_b = s.substr(i + 1);
if (is_one_of(str_a, "", "+", "-"))
a = str_a == "-" ? -1 : 1;
else
{
if (!to_int(str_a, a)) return {};
}
trim(str_b); if (str_b != "")
{
if (str_b[0] == '+' || str_b[0] == '-')
while (is_whitespace(str_b[1])) str_b.erase(1, 1);
if (!to_int(str_b, b)) return {};
}
else
b = 0;
return {a, b};
}
int find_of_keyword(const css_token_vector& tokens)
{
for (int i = 0; i < (int)tokens.size(); i++)
{
if (tokens[i].ident() == "of")
return i;
}
return -1;
}
css_attribute_selector parse_nth_child(const css_token& token, bool of_keyword, document_mode mode)
{
css_attribute_selector selector(select_pseudo_class, lowcase(token.name));
const auto& tokens = token.value;
int i = of_keyword ? find_of_keyword(tokens) : -1;
if (i >= 0)
{
const auto& selector_tokens = slice(tokens, i + 1);
selector.selector_list = parse_selector_list(selector_tokens, forgiving_mode + forbid_pseudo_elements, mode);
}
string str = get_repr(tokens, 0, i);
an_b x = parse_an_b(str);
if (!x.valid) return {};
selector.a = x.a;
selector.b = x.b;
return selector;
}
css_attribute_selector parse_function_pseudo_class(const css_token& token, document_mode mode)
{
string name = lowcase(token.name);
if (name == "nth-child" || name == "nth-last-child")
{
return parse_nth_child(token, true, mode);
}
else if (name == "nth-of-type" || name == "nth-last-of-type")
{
return parse_nth_child(token, false, mode);
}
else if (name == "is") {
css_attribute_selector selector(select_pseudo_class, name);
selector.selector_list = parse_selector_list(token.value, forgiving_mode + forbid_pseudo_elements, mode);
return selector;
}
else if (name == "not") {
css_attribute_selector selector(select_pseudo_class, name);
selector.selector_list = parse_selector_list(token.value, strict_mode + forbid_pseudo_elements, mode);
if (selector.selector_list.empty()) return {};
return selector;
}
else if (name == "lang") {
css_attribute_selector selector(select_pseudo_class, name);
selector.value = get_repr(token.value);
return selector;
}
return {};
}
bool is_supported_simple_pseudo_class(const string& name)
{
static std::set<string> supported_simple_pseudo_classes =
{
"any-link", "link", "visited", "local-link", "target", "target-within", "scope",
"hover", "active", "focus", "focus-visible", "focus-within",
"root", "empty", "first-child", "last-child", "only-child", "first-of-type", "last-of-type", "only-of-type",
};
return supported_simple_pseudo_classes.count(lowcase(name)) == 1;
}
css_attribute_selector parse_pseudo_class(const css_token_vector& tokens, int& index, document_mode mode)
{
const auto& a = at(tokens, index);
const auto& b = at(tokens, index + 1);
if (a.ch != ':')
return {};
if (b.type == IDENT)
{
if (!is_supported_simple_pseudo_class(b.ident()))
return {};
index += 2;
return { select_pseudo_class, b.ident() };
}
if (b.type == CV_FUNCTION)
{
css_attribute_selector sel = parse_function_pseudo_class(b, mode);
if (sel) index += 2;
return sel;
}
return {};
}
css_attribute_selector parse_subclass_selector(const css_token_vector& tokens, int& index, document_mode mode)
{
css_attribute_selector selector;
const auto& tok0 = at(tokens, index);
const auto& tok1 = at(tokens, index + 1);
switch (tok0.type)
{
case HASH:
if (tok0.hash_type == css_hash_id)
{
index++;
selector.type = select_id;
string name = tok0.name;
if (mode == quirks_mode) lcase(name);
selector.name = _id(name);
return selector;
}
return {};
case '.':
if (tok1.type == IDENT)
{
index += 2;
selector.type = select_class;
string name = tok1.name;
if (mode == quirks_mode) lcase(name);
selector.name = _id(name);
return selector;
}
return {};
case SQUARE_BLOCK:
selector = parse_attribute_selector(tok0);
if (selector) index++;
return selector;
default:
return parse_pseudo_class(tokens, index, mode);
}
}
bool is_supported_simple_pseudo_element(const string& name)
{
return is_one_of(lowcase(name),
"before", "after" );
}
css_attribute_selector parse_pseudo_element(const css_token_vector& tokens, int& index)
{
const auto& a = at(tokens, index);
const auto& b = at(tokens, index + 1);
const auto& c = at(tokens, index + 2);
if (a.ch != ':')
return {};
if (b.ch != ':' && b.type != IDENT)
return {};
if (b.type == IDENT) {
if (!is_one_of(b.ident(), "before", "after")) return {};
index += 2;
return {select_pseudo_element, b.ident()};
}
if (c.type == IDENT) {
if (!is_supported_simple_pseudo_element(c.ident()))
return {};
index += 3;
return {select_pseudo_element, c.ident()};
}
return {};
}
css_element_selector::ptr parse_compound_selector(const css_token_vector& tokens, int& index, document_mode mode)
{
auto selector = make_shared<css_element_selector>();
wq_name wq_name = parse_type_selector(tokens, index);
selector->m_prefix = _id(wq_name.prefix);
selector->m_tag = _id(wq_name.name);
while (css_attribute_selector sel = parse_subclass_selector(tokens, index, mode))
selector->m_attrs.push_back(sel);
while (true)
{
auto sel = parse_pseudo_element(tokens, index);
if (!sel) break;
selector->m_attrs.push_back(sel);
while ((sel = parse_pseudo_class(tokens, index, mode)))
selector->m_attrs.push_back(sel);
}
if (selector->m_tag == empty_id && selector->m_attrs.empty())
return nullptr;
if (selector->m_tag == empty_id)
selector->m_tag = star_id;
return selector;
}
int parse_combinator(const css_token_vector& tokens, int& index)
{
bool ws = skip_whitespace(tokens, index);
const css_token& tok = at(tokens, index);
if (is_one_of(tok.ch, '>', '+', '~'))
{
index++;
skip_whitespace(tokens, index);
return tok.ch;
}
return ws ? ' ' : 0;
}
css_selector::ptr parse_complex_selector(const css_token_vector& tokens, document_mode mode)
{
int index = 0;
skip_whitespace(tokens, index);
auto sel = parse_compound_selector(tokens, index, mode);
if (!sel) return nullptr;
auto selector = make_shared<css_selector>();
selector->m_right = *sel;
while (true)
{
int combinator = parse_combinator(tokens, index);
if (index == (int)tokens.size())
return !combinator || combinator == ' ' ? selector : nullptr;
if (!combinator) return nullptr;
sel = parse_compound_selector(tokens, index, mode);
if (!sel)
return nullptr;
auto new_selector = make_shared<css_selector>();
new_selector->m_left = selector;
new_selector->m_right = *sel;
new_selector->m_combinator = (css_combinator)combinator;
selector = new_selector;
}
}
bool has_selector(const css_selector& selector, attr_select_type type, const string& name = "")
{
for (const auto& sel : selector.m_right.m_attrs)
{
if (sel.type == type && (name == "" || equal_i(_s(sel.name), name)))
return true;
}
if (selector.m_left)
return has_selector(*selector.m_left, type, name);
return false;
}
css_selector::vector parse_selector_list(const css_token_vector& tokens, int options, document_mode mode)
{
vector<css_token_vector> list_of_lists = parse_comma_separated_list(tokens);
css_selector::vector result;
for (const auto& list: list_of_lists)
{
css_selector::ptr selector = parse_complex_selector(list, mode);
if (!selector ||
((options & forbid_pseudo_elements) && has_selector(*selector, select_pseudo_element)))
{
if (options & forgiving_mode)
continue;
return {};
}
result.push_back(selector);
}
return result;
}
bool css_selector::parse(const string& text, document_mode mode)
{
auto tokens = normalize(text, f_componentize);
auto ptr = parse_complex_selector(tokens, mode);
if (!ptr) return false;
*this = *ptr;
return true;
}
}