#include "gtest/gtest.h"
#include <unordered_map>
#include <unordered_set>
#include "src/Lexicon.hpp"
#include "src/MarisaDict.hpp"
#include "src/UTF8Util.hpp"
#include "tools/cpp/runfiles/runfiles.h"
using bazel::tools::cpp::runfiles::Runfiles;
namespace opencc {
static FILE* OpenFile(const std::string& path) {
#ifdef _MSC_VER
return _wfopen(UTF8Util::GetPlatformString(path).c_str(), L"rb");
#else
return fopen(UTF8Util::GetPlatformString(path).c_str(), "rb");
#endif
}
class DictionaryTest : public ::testing::Test,
public ::testing::WithParamInterface<std::string> {
protected:
static void SetUpTestSuite() {
runfiles_.reset(Runfiles::CreateForTest());
ASSERT_NE(nullptr, runfiles_);
}
static std::unique_ptr<Runfiles> runfiles_;
};
std::unique_ptr<Runfiles> DictionaryTest::runfiles_;
class DictionaryRunfilesTest : public ::testing::Test {
protected:
static void SetUpTestSuite() {
runfiles_.reset(Runfiles::CreateForTest());
ASSERT_NE(nullptr, runfiles_);
}
static std::unique_ptr<Runfiles> runfiles_;
};
std::unique_ptr<Runfiles> DictionaryRunfilesTest::runfiles_;
INSTANTIATE_TEST_SUITE_P(
, DictionaryTest,
::testing::Values(
"HKVariants", "HKVariantsRev", "HKVariantsRevPhrases",
"JPShinjitaiCharacters", "JPShinjitaiPhrases", "JPVariants",
"JPVariantsRev", "STCharacters", "STPhrases", "TSCharacters",
"TSPhrases", "TWPhrases", "TWPhrasesRev", "TWVariants",
"TWVariantsRev", "TWVariantsRevPhrases"),
[](const testing::TestParamInfo<DictionaryTest::ParamType>& info) {
return info.param;
});
TEST_P(DictionaryTest, UniqueSortedTest) {
const std::string dictionaryFileName =
runfiles_->Rlocation("_main/data/dictionary/" + GetParam() + ".txt");
FILE* fp = OpenFile(dictionaryFileName);
ASSERT_NE(fp, nullptr);
LexiconPtr lexicon = Lexicon::ParseLexiconFromFile(fp);
EXPECT_TRUE(lexicon->IsUnique()) << GetParam() << " has duplicated keys.";
EXPECT_TRUE(lexicon->IsSorted()) << GetParam() << " is not sorted.";
}
TEST_P(DictionaryTest, BinaryTest) {
const std::string binaryDictionaryFileName =
runfiles_->Rlocation("_main/data/dictionary/" + GetParam() + ".ocd2");
FILE* fp_bin = OpenFile(binaryDictionaryFileName);
ASSERT_NE(fp_bin, nullptr);
MarisaDictPtr dict = MarisaDict::NewFromFile(fp_bin);
ASSERT_NE(dict, nullptr);
const std::string textDictionaryFileName =
runfiles_->Rlocation("_main/data/dictionary/" + GetParam() + ".txt");
FILE* fp_txt = OpenFile(textDictionaryFileName);
ASSERT_NE(fp_txt, nullptr);
LexiconPtr txt_lexicon = Lexicon::ParseLexiconFromFile(fp_txt);
EXPECT_EQ(dict->GetLexicon()->Length(), txt_lexicon->Length());
}
TEST_F(DictionaryRunfilesTest, TWPhrasesReverseMapping) {
const std::string twPhrasesFile =
runfiles_->Rlocation("_main/data/dictionary/TWPhrases.txt");
const std::string twPhrasesRevFile =
runfiles_->Rlocation("_main/data/dictionary/TWPhrasesRev.txt");
auto loadLexicon = [](const std::string& path) -> LexiconPtr {
FILE* fp = OpenFile(path);
EXPECT_NE(fp, nullptr) << path;
if (fp == nullptr) {
return LexiconPtr();
}
return Lexicon::ParseLexiconFromFile(fp);
};
auto buildMap = [](const LexiconPtr& lexicon)
-> std::unordered_map<std::string, std::unordered_set<std::string>> {
std::unordered_map<std::string, std::unordered_set<std::string>> map;
if (!lexicon) {
return map;
}
for (size_t i = 0; i < lexicon->Length(); ++i) {
const DictEntry* entry = lexicon->At(i);
auto& values = map[entry->Key()];
for (const auto& value : entry->Values()) {
values.insert(value);
}
}
return map;
};
try {
LexiconPtr twPhrases = loadLexicon(twPhrasesFile);
LexiconPtr twPhrasesRev = loadLexicon(twPhrasesRevFile);
ASSERT_NE(twPhrases, nullptr);
ASSERT_NE(twPhrasesRev, nullptr);
auto twMap = buildMap(twPhrases);
auto twRevMap = buildMap(twPhrasesRev);
for (const auto& entry : twMap) {
const std::string& key = entry.first;
for (const auto& value : entry.second) {
auto it = twRevMap.find(value);
EXPECT_TRUE(it != twRevMap.end() && it->second.count(key) > 0)
<< "Missing reverse mapping: " << key << " -> " << value;
}
}
for (const auto& entry : twRevMap) {
const std::string& key = entry.first;
for (const auto& value : entry.second) {
auto it = twMap.find(value);
EXPECT_TRUE(it != twMap.end() && it->second.count(key) > 0)
<< "Missing reverse mapping: " << key << " -> " << value;
}
}
} catch (const Exception& ex) {
FAIL() << "Exception: " << ex.what();
} catch (const std::exception& ex) {
FAIL() << "std::exception: " << ex.what();
} catch (...) {
FAIL() << "Unknown exception thrown during reverse mapping check.";
}
}
}