#include <stdio.h>
#include <stdlib.h>
#ifndef _WIN32
#include <unistd.h>
#include <sys/time.h>
#else
#include <io.h>
#endif
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <ctype.h>
#include <list>
#include <string>
#include <fstream>
#include <set>
#include "llvm/Config/llvm-config.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/IRBuilder.h"
#if USE_NEW_PM
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/IR/PassManager.h"
#else
#include "llvm/IR/LegacyPassManager.h"
#endif
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Pass.h"
#include "llvm/IR/Constants.h"
#ifndef O_DSYNC
#define O_DSYNC O_SYNC
#endif
#define MAX_AUTO_EXTRA 32
#define USE_AUTO_EXTRAS 4096
#define MAX_AUTO_EXTRAS (USE_AUTO_EXTRAS * 8)
#include <iostream>
#define FATAL(x...) \
do { \
fprintf(stderr, "FATAL: " x); \
exit(1); \
\
} while (0)
using namespace llvm;
namespace {
bool isIgnoreFunction(const llvm::Function *F) {
static constexpr const char *ignoreList[] = {
"asan.",
"llvm.",
"sancov.",
"__ubsan",
"ign.",
"__afl",
"_fini",
"__libc_",
"__asan",
"__msan",
"__cmplog",
"__sancov",
"__san",
"__cxx_",
"__decide_deferred",
"_GLOBAL",
"_ZZN6__asan",
"_ZZN6__lsan",
"msan.",
"LLVMFuzzerM",
"LLVMFuzzerC",
"LLVMFuzzerI",
"maybe_duplicate_stderr",
"discard_output",
"close_stdout",
"dup_and_close_stderr",
"maybe_close_fd_mask",
"ExecuteFilesOnyByOne"
};
for (auto const &ignoreListFunc : ignoreList) {
if (F->getName().startswith(ignoreListFunc)) { return true; }
}
static constexpr const char *ignoreSubstringList[] = {
"__asan", "__msan", "__ubsan", "__lsan",
"__san", "__sanitize", "__cxx", "_GLOBAL__",
"DebugCounter", "DwarfDebug", "DebugLoc"
};
for (auto const &ignoreListFunc : ignoreSubstringList) {
if (StringRef::npos != F->getName().find(ignoreListFunc)) { return true; }
}
return false;
}
#if USE_NEW_PM
class AutoTokensPass : public PassInfoMixin<AutoTokensPass> {
public:
AutoTokensPass() {
#else
class AutoTokensPass : public ModulePass {
public:
static char ID;
AutoTokensPass() : ModulePass(ID) {
#endif
}
#if USE_NEW_PM
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
#else
bool runOnModule(Module &M) override;
#endif
protected:
private:
std::vector<std::string> dictionary;
};
}
#if USE_NEW_PM
extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK
llvmGetPassPluginInfo() {
return {LLVM_PLUGIN_API_VERSION, "AutoTokensPass", "v0.1",
[](PassBuilder &PB) {
#if LLVM_VERSION_MAJOR <= 13
using OptimizationLevel = typename PassBuilder::OptimizationLevel;
#endif
PB.registerOptimizerLastEPCallback(
[](ModulePassManager &MPM, OptimizationLevel OL) {
MPM.addPass(AutoTokensPass());
});
}};
}
#else
char AutoTokensPass::ID = 0;
#endif
void dict2file(int fd, uint8_t *mem, uint32_t len) {
uint32_t i, j, binary = 0;
char line[MAX_AUTO_EXTRA * 8], tmp[8];
strcpy(line, "\"");
j = 1;
for (i = 0; i < len; i++) {
if (isprint(mem[i]) && mem[i] != '\\' && mem[i] != '"') {
line[j++] = mem[i];
} else {
if (i + 1 != len || mem[i] != 0 || binary || len == 4 || len == 8) {
line[j] = 0;
sprintf(tmp, "\\x%02x", (uint8_t)mem[i]);
strcat(line, tmp);
j = strlen(line);
}
binary = 1;
}
}
line[j] = 0;
strcat(line, "\"\n");
if (write(fd, line, strlen(line)) <= 0) {
FATAL("Could not write to the dictionary file");
}
#ifndef _WIN32
fsync(fd);
#endif
}
#if USE_NEW_PM
PreservedAnalyses AutoTokensPass::run(Module &M, ModuleAnalysisManager &MAM) {
#else
bool AutoTokensPass::runOnModule(Module &M) {
#endif
DenseMap<Value *, std::string *> valueMap;
char *ptr;
int fd, found = 0;
bool use_file = true;
setvbuf(stdout, NULL, _IONBF, 0);
ptr = getenv("AFL_LLVM_DICT2FILE");
if (!ptr || *ptr != '/') {
use_file = false;
}
if (use_file) {
#ifndef _WIN32
if ((fd = open(ptr, O_WRONLY | O_APPEND | O_CREAT | O_DSYNC, 0644)) < 0)
#else
if ((fd = open(ptr, O_WRONLY | O_APPEND | O_CREAT, 0644)) < 0)
#endif
FATAL("Could not open/create %s.", ptr);
}
for (auto &F : M) {
if (isIgnoreFunction(&F)) { continue; }
for (auto &BB : F) {
for (auto &IN : BB) {
CallInst *callInst = nullptr;
CmpInst *cmpInst = nullptr;
if ((cmpInst = dyn_cast<CmpInst>(&IN))) {
Value *op = cmpInst->getOperand(1);
ConstantInt *ilen = dyn_cast<ConstantInt>(op);
if (ilen && ilen->uge(0xffffffffffffffff) == false) {
uint64_t val2 = 0, val = ilen->getZExtValue();
uint32_t len = 0;
if (val > 0x10000 && val < 0xffffffff) { len = 4; }
if (val > 0x100000001 && val < 0xffffffffffffffff) { len = 8; }
if (len) {
auto c = cmpInst->getPredicate();
switch (c) {
case CmpInst::FCMP_OGT: case CmpInst::FCMP_OLE: case CmpInst::ICMP_SLE: case CmpInst::ICMP_SGT:
if ((len == 4 && (val & 80000000)) ||
(len == 8 && (val & 8000000000000000))) {
if ((val & 0xffff) != 1) { val2 = val - 1; }
break;
}
case CmpInst::FCMP_UGT: case CmpInst::FCMP_ULE: case CmpInst::ICMP_UGT: case CmpInst::ICMP_ULE:
if ((val & 0xffff) != 0xfffe) val2 = val + 1;
break;
case CmpInst::FCMP_OLT: case CmpInst::FCMP_OGE: case CmpInst::ICMP_SLT: case CmpInst::ICMP_SGE:
if ((len == 4 && (val & 80000000)) ||
(len == 8 && (val & 8000000000000000))) {
if ((val & 0xffff) != 1) val2 = val - 1;
break;
}
case CmpInst::FCMP_ULT: case CmpInst::FCMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_UGE:
if ((val & 0xffff) != 1) val2 = val - 1;
break;
default:
val2 = 0;
}
if (use_file) {
dict2file(fd, (uint8_t *)&val, len);
} else {
dictionary.push_back(std::string((char *)&val, len));
}
found++;
if (val2) {
if (use_file) {
dict2file(fd, (uint8_t *)&val2, len);
} else {
dictionary.push_back(std::string((char *)&val2, len));
}
found++;
}
}
}
}
if ((callInst = dyn_cast<CallInst>(&IN))) {
bool isStrcmp = true;
bool isMemcmp = true;
bool isStrncmp = true;
bool isStrcasecmp = true;
bool isStrncasecmp = true;
bool isIntMemcpy = true;
bool isStdString = true;
bool addedNull = false;
size_t optLen = 0;
Function *Callee = callInst->getCalledFunction();
if (!Callee) continue;
if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
std::string FuncName = Callee->getName().str();
isStrcmp &= !FuncName.compare("strcmp");
isMemcmp &=
(!FuncName.compare("memcmp") || !FuncName.compare("bcmp"));
isStrncmp &= !FuncName.compare("strncmp");
isStrcasecmp &= !FuncName.compare("strcasecmp");
isStrncasecmp &= !FuncName.compare("strncasecmp");
isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64");
isStdString &= ((FuncName.find("basic_string") != std::string::npos &&
FuncName.find("compare") != std::string::npos) ||
(FuncName.find("basic_string") != std::string::npos &&
FuncName.find("find") != std::string::npos));
if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
!isStrncasecmp && !isIntMemcpy && !isStdString)
continue;
FunctionType *FT = Callee->getFunctionType();
isStrcmp &=
FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) &&
FT->getParamType(0) == FT->getParamType(1) &&
FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext());
isStrcasecmp &=
FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) &&
FT->getParamType(0) == FT->getParamType(1) &&
FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext());
isMemcmp &= FT->getNumParams() == 3 &&
FT->getReturnType()->isIntegerTy(32) &&
FT->getParamType(0)->isPointerTy() &&
FT->getParamType(1)->isPointerTy() &&
FT->getParamType(2)->isIntegerTy();
isStrncmp &= FT->getNumParams() == 3 &&
FT->getReturnType()->isIntegerTy(32) &&
FT->getParamType(0) == FT->getParamType(1) &&
FT->getParamType(0) ==
IntegerType::getInt8PtrTy(M.getContext()) &&
FT->getParamType(2)->isIntegerTy();
isStrncasecmp &= FT->getNumParams() == 3 &&
FT->getReturnType()->isIntegerTy(32) &&
FT->getParamType(0) == FT->getParamType(1) &&
FT->getParamType(0) ==
IntegerType::getInt8PtrTy(M.getContext()) &&
FT->getParamType(2)->isIntegerTy();
isStdString &= FT->getNumParams() >= 2 &&
FT->getParamType(0)->isPointerTy() &&
FT->getParamType(1)->isPointerTy();
if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
!isStrncasecmp && !isIntMemcpy && !isStdString)
continue;
Value *Str1P = callInst->getArgOperand(0),
*Str2P = callInst->getArgOperand(1);
std::string Str1, Str2;
StringRef TmpStr;
bool HasStr1;
getConstantStringInfo(Str1P, TmpStr);
if (TmpStr.empty()) {
HasStr1 = false;
} else {
HasStr1 = true;
Str1 = TmpStr.str();
}
bool HasStr2;
getConstantStringInfo(Str2P, TmpStr);
if (TmpStr.empty()) {
HasStr2 = false;
} else {
HasStr2 = true;
Str2 = TmpStr.str();
}
if (!HasStr2) {
auto *Ptr = dyn_cast<ConstantExpr>(Str2P);
if (Ptr && Ptr->getOpcode() == Instruction::GetElementPtr) {
if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
if (Var->hasInitializer()) {
if (auto *Array =
dyn_cast<ConstantDataArray>(Var->getInitializer())) {
HasStr2 = true;
Str2 = Array->getRawDataValues().str();
}
}
}
}
}
if (isIntMemcpy == true) {
if (HasStr2 == true) {
Value *op2 = callInst->getArgOperand(2);
ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
if (ilen) {
uint64_t literalLength = Str2.length();
uint64_t optLength = ilen->getZExtValue();
if (literalLength + 1 == optLength) {
Str2.append("\0", 1); }
if (optLength > Str2.length()) { optLength = Str2.length(); }
}
valueMap[Str1P] = new std::string(Str2);
continue;
}
continue;
}
if (!HasStr2) {
std::string *strng = valueMap[Str2P];
if (strng && !strng->empty()) {
Str2 = *strng;
HasStr2 = true;
}
}
if (!HasStr1) {
auto Ptr = dyn_cast<ConstantExpr>(Str1P);
if (Ptr && Ptr->getOpcode() == Instruction::GetElementPtr) {
if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
if (Var->hasInitializer()) {
if (auto *Array =
dyn_cast<ConstantDataArray>(Var->getInitializer())) {
HasStr1 = true;
Str1 = Array->getRawDataValues().str();
}
}
}
}
}
if (!HasStr1) {
std::string *strng = valueMap[Str1P];
if (strng && !strng->empty()) {
Str1 = *strng;
HasStr1 = true;
}
}
if (!(HasStr1 ^ HasStr2)) continue;
std::string thestring;
if (HasStr1)
thestring = Str1;
else
thestring = Str2;
optLen = thestring.length();
if (optLen < 2 || (optLen == 2 && !thestring[1])) { continue; }
if (isMemcmp || isStrncmp || isStrncasecmp) {
Value *op2 = callInst->getArgOperand(2);
ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
if (ilen) {
uint64_t literalLength = optLen;
optLen = ilen->getZExtValue();
if (optLen > thestring.length()) { optLen = thestring.length(); }
if (optLen < 2) { continue; }
if (literalLength + 1 == optLen) { thestring.append("\0", 1);
addedNull = true;
}
}
}
if (!isMemcmp) {
if (addedNull == false && thestring[optLen - 1] != '\0') {
thestring.append("\0", 1); optLen++;
}
if (!isStdString) {
size_t offset = thestring.find('\0', 0);
if (offset + 1 < optLen) optLen = offset + 1;
thestring = thestring.substr(0, optLen);
}
}
if (optLen != thestring.length()) optLen = thestring.length();
if (optLen > MAX_AUTO_EXTRA) optLen = MAX_AUTO_EXTRA;
if (optLen < 3) continue;
ptr = (char *)thestring.c_str();
if (use_file) {
dict2file(fd, (uint8_t *)ptr, optLen);
} else {
dictionary.push_back(thestring.substr(0, optLen));
}
found++;
}
}
}
}
if (use_file) {
close(fd);
#if USE_NEW_PM
auto PA = PreservedAnalyses::all();
return PA;
#else
return true;
#endif
}
LLVMContext &Ctx = M.getContext();
if (dictionary.size()) {
size_t memlen = 0, count = 0, offset = 0;
std::sort(dictionary.begin(), dictionary.end());
auto last = std::unique(dictionary.begin(), dictionary.end());
dictionary.erase(last, dictionary.end());
for (auto token : dictionary) {
memlen += token.length();
count++;
}
if (count) {
auto ptrhld = std::unique_ptr<char[]>(new char[memlen + count]);
count = 0;
for (auto token : dictionary) {
if (offset + token.length() < 0xfffff0 && count < MAX_AUTO_EXTRAS) {
ptrhld.get()[offset++] = (uint8_t)token.length();
memcpy(ptrhld.get() + offset, token.c_str(), token.length());
offset += token.length();
count++;
}
}
ArrayType *arrayTy = ArrayType::get(IntegerType::get(Ctx, 8), offset);
GlobalVariable *dict = new GlobalVariable(
M, arrayTy, true, GlobalVariable::ExternalLinkage,
ConstantDataArray::get(Ctx,
*(new ArrayRef<char>(ptrhld.get(), offset))),
"libafl_dictionary_" + M.getName());
#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || \
defined(__OpenBSD__) || defined(__DragonFly__)
dict->setSection("libafl_token");
#elif defined(__APPLE__)
dict->setSection("__DATA,__libafl_token");
#endif
}
}
#if USE_NEW_PM
auto PA = PreservedAnalyses::all();
return PA;
#else
return true;
#endif
}
#if USE_NEW_PM
#else
static void registerAutoTokensPass(const PassManagerBuilder &,
legacy::PassManagerBase &PM) {
PM.add(new AutoTokensPass());
}
static RegisterPass<AutoTokensPass> X("autotokens",
"autotokens instrumentation pass", false,
false);
static RegisterStandardPasses RegisterAutoTokensPass(
PassManagerBuilder::EP_OptimizerLast, registerAutoTokensPass);
static RegisterStandardPasses RegisterAutoTokensPass0(
PassManagerBuilder::EP_EnabledOnOptLevel0, registerAutoTokensPass);
#endif