libsimple 0.9.0

Rust bindings to simple, a SQLite3 fts5 tokenizer which supports Chinese and PinYin.
#ifndef SIMPLE_TOKENIZER_H_
#define SIMPLE_TOKENIZER_H_

#include <memory>

#ifdef USE_JIEBA
#include "cppjieba/Jieba.hpp"
#endif
#include "pinyin.h"
#include "sqlite3ext.h"

typedef int (*xTokenFn)(void *, int, const char *, int, int, int);

namespace simple_tokenizer {

#ifdef USE_JIEBA
extern std::string jieba_dict_path;
#endif

enum class TokenCategory {
  SPACE,
  ASCII_ALPHABETIC,
  DIGIT,
  OTHER,
};

class SimpleTokenizer {
 private:
  static std::shared_ptr<PinYin> get_pinyin();
  bool enable_pinyin = true;

 public:
  SimpleTokenizer(const char **zaArg, int nArg);
  int tokenize(void *pCtx, int flags, const char *text, int textLen, xTokenFn xToken) const;
  static std::string tokenize_query(const char *text, int textLen, int flags = 1);
  static bool set_pinyin_dict(const std::string &pinyin_file_path, std::string &err);
#ifdef USE_JIEBA
  static std::string tokenize_jieba_query(const char *text, int textLen, int flags = 1);
#endif

 private:
  static void append_result(std::string &result, std::string part, TokenCategory category, int offset, int flags);
};

}  // namespace simple_tokenizer

extern "C" int fts5_simple_xCreate(void *sqlite3, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
extern "C" int fts5_simple_xTokenize(Fts5Tokenizer *tokenizer_ptr, void *pCtx, int flags, const char *pText, int nText,
                                     xTokenFn xToken);
extern "C" void fts5_simple_xDelete(Fts5Tokenizer *tokenizer_ptr);

extern "C" int sqlite3_simple_init(sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi);

#endif  // SIMPLE_TOKENIZER_H_