Various scripts for playing around with natural language processing/generation

json_to_txt.py 554B

123456789101112131415161718192021
  1. # Converts a json twitter dump to raw text file.
  2. import codecs
  3. import json
  4. import sys
  5. def get_text_from_json(filename):
  6. with codecs.open(filename, 'r', 'utf-8') as f:
  7. return [item['text'] for item in json.loads(f.read())]
  8. def write_text_to_file(filename, text_array, delimiter=' '):
  9. text_to_write = delimiter.join(text_array)
  10. with codecs.open(filename, 'w', 'utf-8') as f:
  11. f.write(text_to_write)
  12. if __name__ == '__main__':
  13. text_array = get_text_from_json(sys.argv[1])
  14. write_text_to_file(sys.argv[2], text_array)