#include "common/string_utils.h"
#include <sstream>
#include <vector>
#include "common/exception/runtime.h"
#include "function/string/functions/base_lower_upper_function.h"
#include "utf8proc_wrapper.h"
namespace lbug {
namespace common {
std::vector<std::string> StringUtils::splitComma(const std::string& input) {
auto result = std::vector<std::string>();
auto currentPos = 0u;
auto lvl = 0u;
while (currentPos < input.length()) {
if (input[currentPos] == '(') {
lvl++;
} else if (input[currentPos] == ')') {
lvl--;
} else if (lvl == 0 && input[currentPos] == ',') {
break;
}
currentPos++;
}
result.push_back(input.substr(0, currentPos));
result.push_back(input.substr(currentPos == input.length() ? input.length() : currentPos + 1));
return result;
}
static char openingBracket(char c) {
if (c == ')') {
return '(';
}
if (c == ']') {
return '[';
}
if (c == '}') {
return '{';
}
return c;
}
std::vector<std::string_view> StringUtils::smartSplit(std::string_view input, char splitChar,
uint64_t maxNumEle) {
if (input.size() == 0) {
return {};
}
std::vector<std::string_view> result;
auto currentItem = 0u;
std::vector<char> stk;
bool insideSingleQuote = false;
for (auto i = 0u; i < input.size(); i++) {
char c = input[i];
if (c == '\'' && (stk.size() == 0 || stk.back() != '\'')) {
insideSingleQuote = !insideSingleQuote;
} else if (c == splitChar && stk.size() == 0u && !insideSingleQuote) {
if (result.size() + 1 == maxNumEle) {
result.push_back(input.substr(currentItem));
return result;
} else {
result.push_back(input.substr(currentItem, i - currentItem));
currentItem = i + 1;
}
} else if (c == '{' || c == '(' || c == '[' ||
(c == '\"' && (stk.size() == 0u || stk.back() != '\"'))) {
stk.push_back(c);
} else if (stk.size() > 0 && openingBracket(c) == stk.back()) {
stk.pop_back();
}
}
result.push_back(input.substr(currentItem));
return result;
}
uint64_t findDelim(const std::string& input, const std::string& delimiter, uint64_t prevPos) {
if (delimiter != "") {
return input.find(delimiter, prevPos);
}
return prevPos < input.size() - 1 ? prevPos + 1 : std::string::npos;
}
std::vector<std::string> StringUtils::split(const std::string& input, const std::string& delimiter,
bool ignoreEmptyStringParts) {
auto result = std::vector<std::string>();
auto prevPos = 0u;
auto currentPos = findDelim(input, delimiter, prevPos);
while (currentPos != std::string::npos) {
auto stringPart = input.substr(prevPos, currentPos - prevPos);
if (!ignoreEmptyStringParts || !stringPart.empty()) {
result.push_back(input.substr(prevPos, currentPos - prevPos));
}
prevPos = currentPos + delimiter.size();
currentPos = findDelim(input, delimiter, prevPos);
}
result.push_back(input.substr(prevPos));
return result;
}
std::vector<std::string> StringUtils::splitBySpace(const std::string& input) {
std::istringstream iss(input);
std::vector<std::string> result;
std::string token;
while (iss >> token) {
result.push_back(token);
}
return result;
}
std::string StringUtils::getUpper(const std::string& input) {
auto result = input;
toUpper(result);
return result;
}
std::string StringUtils::getUpper(const std::string_view& input) {
auto result = std::string(input);
toUpper(result);
return result;
}
std::string StringUtils::getLower(const std::string& input) {
auto result = input;
toLower(result);
return result;
}
template<bool toUpper>
static void changeCase(std::string& input) {
if (!utf8proc::Utf8Proc::isValid(input.c_str(), input.length())) {
throw RuntimeException{"Invalid UTF8-encoded string."};
}
auto resultLen = function::BaseLowerUpperFunction::getResultLen((char*)input.data(),
input.length(), toUpper);
std::string result(resultLen, '\0' );
function::BaseLowerUpperFunction::convertCase((char*)result.data(), input.length(),
input.data(), toUpper);
input = result;
}
void StringUtils::toLower(std::string& input) {
changeCase<false>(input);
}
void StringUtils::toUpper(std::string& input) {
changeCase<true>(input);
}
void StringUtils::removeCStringWhiteSpaces(const char*& input, uint64_t& len) {
while (len > 0 && isspace(input[0])) {
input++;
len--;
}
while (len > 0 && isspace(input[len - 1])) {
len--;
}
}
void StringUtils::replaceAll(std::string& str, const std::string& search,
const std::string& replacement) {
size_t pos = 0;
while ((pos = str.find(search, pos)) != std::string::npos) {
str.replace(pos, search.length(), replacement);
pos += replacement.length();
}
}
std::string StringUtils::extractStringBetween(const std::string& input, char delimiterStart,
char delimiterEnd, bool includeDelimiter) {
std::string::size_type posStart = input.find_first_of(delimiterStart);
std::string::size_type posEnd = input.find_last_of(delimiterEnd);
if (posStart == std::string::npos || posEnd == std::string::npos || posStart >= posEnd) {
return "";
}
if (includeDelimiter) {
posEnd++;
} else {
posStart++;
}
return input.substr(posStart, posEnd - posStart);
}
uint64_t StringUtils::caseInsensitiveHash(const std::string& str) {
uint32_t hash = 0;
for (auto c : str) {
hash += tolower(c);
hash += hash << 10;
hash ^= hash >> 6;
}
hash += hash << 3;
hash ^= hash >> 11;
hash += hash << 15;
return hash;
}
bool StringUtils::caseInsensitiveEquals(std::string_view left, std::string_view right) {
if (left.size() != right.size()) {
return false;
}
for (auto c = 0u; c < left.size(); c++) {
if (asciiToLowerCaseMap[(uint8_t)left[c]] != asciiToLowerCaseMap[(uint8_t)right[c]]) {
return false;
}
}
return true;
}
std::string StringUtils::join(const std::vector<std::string>& input, const std::string& separator) {
return StringUtils::join(input, input.size(), separator,
[](const std::string& s) { return s; });
}
std::string StringUtils::join(const std::span<const std::string_view> input,
const std::string& separator) {
return StringUtils::join(input, input.size(), separator,
[](const std::string_view s) { return std::string{s}; });
}
template<typename C, typename S, typename Func>
std::string StringUtils::join(const C& input, S count, const std::string& separator, Func f) {
std::string result;
if (count > 0) {
result += f(input[0]);
}
for (size_t i = 1; i < count; i++) {
result += separator + f(input[i]);
}
return result;
}
std::string StringUtils::ltrimNewlines(const std::string& input) {
auto s = input;
s.erase(s.begin(),
find_if(s.begin(), s.end(), [](unsigned char ch) { return !characterIsNewLine(ch); }));
return s;
}
std::string StringUtils::rtrimNewlines(const std::string& input) {
auto s = input;
s.erase(find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !characterIsNewLine(ch); })
.base(),
s.end());
return s;
}
std::string StringUtils::encodeURL(const std::string& input, bool encodeSlash) {
static const char* hex_digit = "0123456789ABCDEF";
static constexpr std::string_view unreserved_chars = "_.-~";
constexpr auto isUnreserved = [](char ch) {
return std::isalnum(ch) || unreserved_chars.find(ch) != std::string_view::npos;
};
std::string result;
result.reserve(input.size());
for (auto ch : input) {
if (isUnreserved(ch)) {
result += ch;
} else if (ch == '/') {
if (encodeSlash) {
result += std::string("%2F");
} else {
result += ch;
}
} else {
result += std::string("%");
result += hex_digit[static_cast<unsigned char>(ch) >> 4];
result += hex_digit[static_cast<unsigned char>(ch) & 15];
}
}
return result;
}
} }