|
@@ -0,0 +1,26 @@
|
|
1
|
+import nltk
|
|
2
|
+import random
|
|
3
|
+from stat_parser import Parser
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+class PoemGenerator():
|
|
7
|
+ def __init__(self, corpus):
|
|
8
|
+ self.sents = corpus.sents('austen-emma.txt')
|
|
9
|
+ self.bigrams = list(nltk.bigrams(corpus.words('austen-emma.txt')))
|
|
10
|
+ self.cfd = nltk.ConditionalFreqDist(self.bigrams)
|
|
11
|
+ self.parser = Parser()
|
|
12
|
+ self.history = []
|
|
13
|
+
|
|
14
|
+ def generate_poem(self):
|
|
15
|
+ sent = random.choice(self.sents)
|
|
16
|
+ parsed = self.parser.parse(' '.join(sent))
|
|
17
|
+ word = random.choice(self.bigrams)[0]
|
|
18
|
+ for i in range(15):
|
|
19
|
+ print word,
|
|
20
|
+ for gram in self.cfd[word].items():
|
|
21
|
+ import ipdb; ipdb.set_trace() # BREAKPOINT
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+if __name__ == '__main__':
|
|
25
|
+ generator = PoemGenerator(nltk.corpus.gutenberg)
|
|
26
|
+ print generator.generate_poem()
|