#include "function/string/functions/find_function.h"
#include <cstring>
using namespace lbug::common;
namespace lbug {
namespace function {
template<class UNSIGNED>
int64_t Find::unalignedNeedleSizeFind(const uint8_t* haystack, uint32_t haystackLen,
const uint8_t* needle, uint32_t needleLen, uint32_t firstMatchCharOffset) {
if (needleLen > haystackLen) {
return -1;
}
UNSIGNED needleEntry = 0;
UNSIGNED haystackEntry = 0;
const UNSIGNED start = (sizeof(UNSIGNED) * 8) - 8;
const UNSIGNED shift = (sizeof(UNSIGNED) - needleLen) * 8;
for (auto i = 0u; i < needleLen; i++) {
needleEntry |= UNSIGNED(needle[i]) << UNSIGNED(start - i * 8);
haystackEntry |= UNSIGNED(haystack[i]) << UNSIGNED(start - i * 8);
}
for (auto offset = needleLen; offset < haystackLen; offset++) {
if (haystackEntry == needleEntry) {
return firstMatchCharOffset + offset - needleLen;
}
haystackEntry = (haystackEntry << 8) | ((UNSIGNED(haystack[offset])) << shift);
}
if (haystackEntry == needleEntry) {
return firstMatchCharOffset + haystackLen - needleLen;
}
return -1;
}
template<class UNSIGNED>
int64_t Find::alignedNeedleSizeFind(const uint8_t* haystack, uint32_t haystackLen,
const uint8_t* needle, uint32_t firstMatchCharOffset) {
if (sizeof(UNSIGNED) > haystackLen) {
return -1;
}
auto needleVal = *((UNSIGNED*)needle);
for (auto offset = 0u; offset <= haystackLen - sizeof(UNSIGNED); offset++) {
auto haystackVal = *((UNSIGNED*)(haystack + offset));
if (needleVal == haystackVal) {
return firstMatchCharOffset + offset;
}
}
return -1;
}
int64_t Find::genericFind(const uint8_t* haystack, uint32_t haystackLen, const uint8_t* needle,
uint32_t needLen, uint32_t firstMatchCharOffset) {
if (needLen > haystackLen) {
return -1;
}
auto sumsDiff = 0u;
for (auto i = 0u; i < needLen; i++) {
sumsDiff += haystack[i];
sumsDiff -= needle[i];
}
auto offset = 0u;
while (true) {
if (sumsDiff == 0 && haystack[offset] == needle[0]) {
if (memcmp(haystack + offset, needle, needLen) == 0) {
return firstMatchCharOffset + offset;
}
}
if (offset >= haystackLen - needLen) {
return -1;
}
sumsDiff -= haystack[offset];
sumsDiff += haystack[offset + needLen];
offset++;
}
}
int64_t Find::find(const uint8_t* haystack, uint32_t haystackLen, const uint8_t* needle,
uint32_t needleLen) {
auto firstMatchCharPos = (uint8_t*)memchr(haystack, needle[0], haystackLen);
if (firstMatchCharPos == nullptr) {
return -1;
}
auto firstMatchCharOffset = firstMatchCharPos - haystack;
auto numCharsToMatch = haystackLen - firstMatchCharOffset;
switch (needleLen) {
case 1:
return firstMatchCharOffset;
case 2:
return alignedNeedleSizeFind<uint16_t>(firstMatchCharPos, numCharsToMatch, needle,
firstMatchCharOffset);
case 3:
return unalignedNeedleSizeFind<uint32_t>(firstMatchCharPos, numCharsToMatch, needle, 3,
firstMatchCharOffset);
case 4:
return alignedNeedleSizeFind<uint32_t>(firstMatchCharPos, numCharsToMatch, needle,
firstMatchCharOffset);
case 5:
case 6:
case 7:
return unalignedNeedleSizeFind<uint64_t>(firstMatchCharPos, numCharsToMatch, needle,
needleLen, firstMatchCharOffset);
case 8:
return alignedNeedleSizeFind<uint64_t>(firstMatchCharPos, numCharsToMatch, needle,
firstMatchCharOffset);
default:
return genericFind(firstMatchCharPos, numCharsToMatch, needle, needleLen,
firstMatchCharOffset);
}
}
} }