#include "Conversion.hpp"
#include "PrefixMatch.hpp"
#include "Segments.hpp"
#include "UTF8Util.hpp"
using namespace opencc;
Conversion::Conversion(DictPtr _dict)
: dict(_dict), prefixMatch(new PrefixMatch(_dict)) {}
std::string Conversion::Convert(const char* phrase) const {
std::string buffer;
AppendConverted(phrase, &buffer);
return buffer;
}
void Conversion::AppendConverted(const char* phrase, std::string* output) const {
const char* phraseEnd = phrase;
while (*phraseEnd != '\0') {
phraseEnd++;
}
const size_t phraseLength = phraseEnd - phrase;
output->reserve(output->size() + phraseLength + phraseLength / 5);
for (const char* pstr = phrase; *pstr != '\0';) {
size_t remainingLength = phraseEnd - pstr;
const PrefixMatch::Match matched =
prefixMatch->MatchPrefix(pstr, remainingLength);
size_t matchedLength;
if (!matched.matched) {
matchedLength = UTF8Util::NextCharLength(pstr);
if (matchedLength > remainingLength) {
matchedLength = remainingLength;
}
output->append(pstr, matchedLength);
} else {
matchedLength = matched.keyLength;
if (matchedLength > remainingLength) {
matchedLength = remainingLength;
}
output->append(*matched.value);
}
pstr += matchedLength;
}
}
std::string Conversion::Convert(const std::string& phrase) const {
return Convert(phrase.c_str());
}
SegmentsPtr Conversion::Convert(const SegmentsPtr& input) const {
SegmentsPtr output(new Segments);
for (const char* segment : *input) {
output->AddSegment(Convert(segment));
}
return output;
}