32 lines
845 B
Python
32 lines
845 B
Python
import nltk
|
|
import random
|
|
import string
|
|
import sys
|
|
|
|
|
|
def main(text):
|
|
bigrams = list(nltk.bigrams(
|
|
[token for token in nltk.word_tokenize(text.decode('utf8'))
|
|
if set(token).difference(set(string.punctuation))]))
|
|
cfdist = nltk.ConditionalFreqDist(bigrams)
|
|
word = random.choice(bigrams)[0]
|
|
for i in range(155):
|
|
print word,
|
|
if i % 3:
|
|
top_words = tuple(cfdist[word])
|
|
else:
|
|
dist = cfdist[word].copy()
|
|
top_words = []
|
|
for i in range(3):
|
|
if dist:
|
|
top_words.append(dist.max())
|
|
del dist[top_words[-1]]
|
|
else:
|
|
break
|
|
word = random.choice(top_words)
|
|
|
|
if __name__ == '__main__':
|
|
file = sys.argv[1]
|
|
with open(file, 'r') as f:
|
|
main(f.read())
|