import fileinput
import json
import logging
import sys
from argparse import ArgumentParser
from argparse import Namespace as ArgumentNamespace
from typing import Any, Dict, NewType
log = logging.getLogger(__name__)
CommentDict = NewType("CommentDict", Dict[str, Any])
def convert_comment(old_comment: CommentDict) -> CommentDict:
assigned_labels = old_comment.get("assigned_labels")
dismissed_labels = old_comment.get("dismissed_labels")
assigned_entities = old_comment.get("assigned_entities")
dismissed_entities = old_comment.get("dismissed_entities")
labellings = {}
if assigned_labels is not None:
labellings["assigned"] = assigned_labels
del old_comment["assigned_labels"]
if dismissed_labels is not None:
labellings["dismissed"] = dismissed_labels
del old_comment["dismissed_labels"]
entities = {}
if assigned_entities is not None:
entities["assigned"] = assigned_entities
del old_comment["assigned_entities"]
if dismissed_entities is not None:
entities["dismissed"] = dismissed_entities
del old_comment["dismissed_entities"]
new_comment = CommentDict({"comment": old_comment})
if len(labellings) > 0:
new_comment["labelling"] = labellings
if len(entities) > 0:
new_comment["entities"] = entities
return new_comment
def run(args: ArgumentNamespace) -> None:
new_comments = map(
convert_comment, map(json.loads, fileinput.input(args.file))
)
for comment_str in map(json.dumps, new_comments):
sys.stdout.write(comment_str)
sys.stdout.write("\n")
sys.stdout.flush()
def configure_parser(parser: ArgumentParser) -> None:
parser.description = """
Converts comments with annotations from the old format (flat) to the one
used by the CLI with top level keys: `commment`, `labellings`, `entities`.
"""
parser.add_argument(
"file",
type=str,
action="store",
metavar="PATH",
help=(
"Path to a jsonl file with comments in old format. "
"Pass - to use stdin"
),
)
def main() -> None:
parser = ArgumentParser()
configure_parser(parser)
args = parser.parse_args()
run(args)
if __name__ == "__main__":
main()