opencc-sys 0.4.1+1.3.0

OpenCC bindings for Rust
Documentation
/*
 * Open Chinese Convert
 *
 * Copyright 2024-2026 Carbo Kuo and contributors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "gtest/gtest.h"

#include <unordered_map>
#include <unordered_set>

#include "src/Lexicon.hpp"
#include "src/MarisaDict.hpp"
#include "src/UTF8Util.hpp"
#include "tools/cpp/runfiles/runfiles.h"

using bazel::tools::cpp::runfiles::Runfiles;

namespace opencc {

static FILE* OpenFile(const std::string& path) {
#ifdef _MSC_VER
  return _wfopen(UTF8Util::GetPlatformString(path).c_str(), L"rb");
#else
  return fopen(UTF8Util::GetPlatformString(path).c_str(), "rb");
#endif
}

class DictionaryTest : public ::testing::Test,
                       public ::testing::WithParamInterface<std::string> {
protected:
  static void SetUpTestSuite() {
    runfiles_.reset(Runfiles::CreateForTest());
    ASSERT_NE(nullptr, runfiles_);
  }

  static std::unique_ptr<Runfiles> runfiles_;
};

std::unique_ptr<Runfiles> DictionaryTest::runfiles_;

class DictionaryRunfilesTest : public ::testing::Test {
protected:
  static void SetUpTestSuite() {
    runfiles_.reset(Runfiles::CreateForTest());
    ASSERT_NE(nullptr, runfiles_);
  }

  static std::unique_ptr<Runfiles> runfiles_;
};

std::unique_ptr<Runfiles> DictionaryRunfilesTest::runfiles_;

INSTANTIATE_TEST_SUITE_P(
    , DictionaryTest,
    ::testing::Values(
        "HKVariants", "HKVariantsRev", "HKVariantsRevPhrases",
        "JPShinjitaiCharacters", "JPShinjitaiPhrases", "JPVariants",
        "JPVariantsRev", "STCharacters", "STPhrases", "TSCharacters",
        "TSPhrases", "TWPhrases", "TWPhrasesRev", "TWVariants",
        "TWVariantsRev", "TWVariantsRevPhrases"),
    [](const testing::TestParamInfo<DictionaryTest::ParamType>& info) {
      return info.param;
    });

TEST_P(DictionaryTest, UniqueSortedTest) {
  const std::string dictionaryFileName =
      runfiles_->Rlocation("_main/data/dictionary/" + GetParam() + ".txt");
  FILE* fp = OpenFile(dictionaryFileName);
  ASSERT_NE(fp, nullptr);
  LexiconPtr lexicon = Lexicon::ParseLexiconFromFile(fp);
  EXPECT_TRUE(lexicon->IsUnique()) << GetParam() << " has duplicated keys.";
  EXPECT_TRUE(lexicon->IsSorted()) << GetParam() << " is not sorted.";
}

TEST_P(DictionaryTest, BinaryTest) {
  const std::string binaryDictionaryFileName =
      runfiles_->Rlocation("_main/data/dictionary/" + GetParam() + ".ocd2");
  FILE* fp_bin = OpenFile(binaryDictionaryFileName);
  ASSERT_NE(fp_bin, nullptr);
  MarisaDictPtr dict = MarisaDict::NewFromFile(fp_bin);
  ASSERT_NE(dict, nullptr);

  const std::string textDictionaryFileName =
      runfiles_->Rlocation("_main/data/dictionary/" + GetParam() + ".txt");
  FILE* fp_txt = OpenFile(textDictionaryFileName);
  ASSERT_NE(fp_txt, nullptr);
  LexiconPtr txt_lexicon = Lexicon::ParseLexiconFromFile(fp_txt);

  EXPECT_EQ(dict->GetLexicon()->Length(), txt_lexicon->Length());
}

TEST_F(DictionaryRunfilesTest, TWPhrasesReverseMapping) {
  const std::string twPhrasesFile =
      runfiles_->Rlocation("_main/data/dictionary/TWPhrases.txt");
  const std::string twPhrasesRevFile =
      runfiles_->Rlocation("_main/data/dictionary/TWPhrasesRev.txt");

  auto loadLexicon = [](const std::string& path) -> LexiconPtr {
    FILE* fp = OpenFile(path);
    EXPECT_NE(fp, nullptr) << path;
    if (fp == nullptr) {
      return LexiconPtr();
    }
    return Lexicon::ParseLexiconFromFile(fp);
  };

  auto buildMap = [](const LexiconPtr& lexicon)
      -> std::unordered_map<std::string, std::unordered_set<std::string>> {
    std::unordered_map<std::string, std::unordered_set<std::string>> map;
    if (!lexicon) {
      return map;
    }
    for (size_t i = 0; i < lexicon->Length(); ++i) {
      const DictEntry* entry = lexicon->At(i);
      auto& values = map[entry->Key()];
      for (const auto& value : entry->Values()) {
        values.insert(value);
      }
    }
    return map;
  };

  try {
    LexiconPtr twPhrases = loadLexicon(twPhrasesFile);
    LexiconPtr twPhrasesRev = loadLexicon(twPhrasesRevFile);
    ASSERT_NE(twPhrases, nullptr);
    ASSERT_NE(twPhrasesRev, nullptr);

    auto twMap = buildMap(twPhrases);
    auto twRevMap = buildMap(twPhrasesRev);

    for (const auto& entry : twMap) {
      const std::string& key = entry.first;
      for (const auto& value : entry.second) {
        auto it = twRevMap.find(value);
        EXPECT_TRUE(it != twRevMap.end() && it->second.count(key) > 0)
            << "Missing reverse mapping: " << key << " -> " << value;
      }
    }

    for (const auto& entry : twRevMap) {
      const std::string& key = entry.first;
      for (const auto& value : entry.second) {
        auto it = twMap.find(value);
        EXPECT_TRUE(it != twMap.end() && it->second.count(key) > 0)
            << "Missing reverse mapping: " << key << " -> " << value;
      }
    }
  } catch (const Exception& ex) {
    FAIL() << "Exception: " << ex.what();
  } catch (const std::exception& ex) {
    FAIL() << "std::exception: " << ex.what();
  } catch (...) {
    FAIL() << "Unknown exception thrown during reverse mapping check.";
  }
}

} // namespace opencc