def main():
with open('data/natural_readable_word_list_65k.txt', 'r') as f:
words = [line.strip() for line in f if line.strip()]
print(f"Original word count: {len(words)}")
seen = set()
unique_words = []
for word in words:
if word not in seen and word.isalpha(): seen.add(word)
unique_words.append(word)
print(f"Unique words: {len(unique_words)}")
if len(unique_words) < 65536:
needed = 65536 - len(unique_words)
print(f"Need {needed} more words")
base_words = list(unique_words[:10000])
suffixes = ['ly', 'ful', 'less', 'ness', 'ment', 'able', 'ible', 'ish', 'ize', 'ify']
for base in base_words:
if len(unique_words) >= 65536:
break
for suffix in suffixes:
candidate = base + suffix
if candidate not in seen and len(candidate) <= 12:
unique_words.append(candidate)
seen.add(candidate)
if len(unique_words) >= 65536:
break
if len(unique_words) < 65536:
prefixes = ['re', 'un', 'pre', 'post', 'over', 'under', 'out', 'up', 'down', 'anti']
for base in base_words:
if len(unique_words) >= 65536:
break
for prefix in prefixes:
candidate = prefix + base
if candidate not in seen and len(candidate) <= 12:
unique_words.append(candidate)
seen.add(candidate)
if len(unique_words) >= 65536:
break
if len(unique_words) < 65536:
colors = ['red', 'blue', 'green', 'black', 'white', 'yellow', 'pink', 'brown', 'gray', 'orange']
objects = ['box', 'ball', 'hat', 'bag', 'cup', 'pen', 'book', 'door', 'key', 'star']
for color in colors:
for obj in objects:
if len(unique_words) >= 65536:
break
candidate = color + obj
if candidate not in seen:
unique_words.append(candidate)
seen.add(candidate)
unique_words = unique_words[:65536]
with open('data/human_readable_word_list_65k.txt', 'w') as f:
f.write('\n'.join(unique_words))
print(f"Final word count: {len(unique_words)}")
print("Dictionary created successfully!")
final_check = len(set(unique_words))
print(f"Unique words in final dictionary: {final_check}")
non_alpha = [w for w in unique_words if not w.isalpha()]
if non_alpha:
print(f"Warning: {len(non_alpha)} non-alphabetic words found")
print(f"First few: {non_alpha[:5]}")
if __name__ == "__main__":
main()