from pathlib import Path
from lindera import Tokenizer, load_dictionary, load_user_dictionary
project_root = Path(__file__).resolve().parent.parent
def main():
dictionary = load_dictionary("embedded://ipadic")
metadata = dictionary.metadata()
user_dictionary_path = str(
project_root / Path("./resources/ipadic_simple_userdic.csv")
)
user_dictionary = load_user_dictionary(user_dictionary_path, metadata)
tokenizer = Tokenizer(dictionary, mode="normal", user_dictionary=user_dictionary)
text = "関西国際空港限定トートバッグを東京スカイツリーの最寄り駅であるとうきょうスカイツリー駅で買う"
print(f"text: {text}\n")
tokens = tokenizer.tokenize(text)
for token in tokens:
print(token.surface)
if __name__ == "__main__":
main()