#include "MaxMatchSegmentation.hpp"
using namespace opencc;
SegmentsPtr MaxMatchSegmentation::Segment(const std::string& text) const {
SegmentsPtr segments(new Segments);
const char* segStart = text.c_str();
size_t segLength = 0;
auto clearBuffer = [&segments, &segStart, &segLength]() {
if (segLength > 0) {
segments->AddSegment(UTF8Util::FromSubstr(segStart, segLength));
segLength = 0;
}
};
const char* textEnd = text.c_str() + text.length();
for (const char* pstr = text.c_str(); *pstr != '\0';) {
size_t remainingLength = textEnd - pstr;
const Optional<const DictEntry*>& matched = dict->MatchPrefix(pstr, remainingLength);
size_t matchedLength;
if (matched.IsNull()) {
matchedLength = UTF8Util::NextCharLength(pstr);
if (matchedLength > remainingLength) {
matchedLength = remainingLength;
}
segLength += matchedLength;
} else {
clearBuffer();
matchedLength = matched.Get()->KeyLength();
segments->AddSegment(matched.Get()->Key());
segStart = pstr + matchedLength;
}
pstr += matchedLength;
}
clearBuffer();
return segments;
}