progress, haiku, and shakespeare

This commit is contained in:
Tyler Hallada 2015-07-14 00:03:05 -04:00
parent f1a295ab3a
commit 3142cf0fb3

View File

@ -1,26 +1,79 @@
import nltk import nltk
import random import random
from stat_parser import Parser import re
from textstat.textstat import textstat
#from stat_parser import Parser
class PoemGenerator(): class PoemGenerator():
def __init__(self, corpus): def __init__(self, corpus):
self.sents = corpus.sents('austen-emma.txt') #self.sents = corpus.sents('austen-emma.txt')
self.bigrams = list(nltk.bigrams(corpus.words('austen-emma.txt'))) self.bigrams = list(nltk.bigrams(corpus.words('shakespeare-hamlet.txt')))
self.only_punctuation = re.compile(r'[^\w\s]+$')
self.all_words = [bigram[0] for bigram in self.bigrams
if not self.only_punctuation.match(bigram[0])]
self.cfd = nltk.ConditionalFreqDist(self.bigrams) self.cfd = nltk.ConditionalFreqDist(self.bigrams)
self.parser = Parser() #self.parser = Parser()
self.history = [] self.history = []
def generate_poem(self): def markov(self, word, n):
sent = random.choice(self.sents) if n > 0:
parsed = self.parser.parse(' '.join(sent))
word = random.choice(self.bigrams)[0]
for i in range(15):
print word, print word,
for gram in self.cfd[word].items(): n = n - 1
import ipdb; ipdb.set_trace() # BREAKPOINT self.markov(random.choice(self.cfd[word].items())[0], n)
else:
print ''
def generate_poem(self):
#sent = random.choice(self.sents)
#parsed = self.parser.parse(' '.join(sent))
word = random.choice(self.bigrams)[0]
self.markov(word, 15)
def haiku_line(self, line, current_syllables, next_words,
target_syllables):
if next_words == []:
# this branch failed
return None
else:
word = random.choice(next_words)
new_line = line[:]
new_line.append(word)
new_syllables = textstat.syllable_count(' '.join(new_line))
if new_syllables == target_syllables:
return new_line
elif new_syllables > target_syllables:
new_next_words = next_words[:]
new_next_words.remove(word)
return self.haiku_line(line, current_syllables, new_next_words,
target_syllables)
else:
new_next_words = [freq[0] for freq in self.cfd[word].items()
if not self.only_punctuation.match(freq[0])]
branch = self.haiku_line(new_line, new_syllables, new_next_words,
target_syllables)
if branch:
return branch
else:
new_next_words = next_words[:]
new_next_words.remove(word)
return self.haiku_line(line, current_syllables, new_next_words,
target_syllables)
def generate_haiku(self):
first = self.haiku_line([], 0, self.all_words, 5)
print ' '.join(first)
next_words = [freq[0] for freq in self.cfd[first[-1]].items()
if not self.only_punctuation.match(freq[0])]
second = self.haiku_line([], 0, next_words, 7)
print ' '.join(second)
next_words = [freq[0] for freq in self.cfd[first[-1]].items()
if not self.only_punctuation.match(freq[0])]
third = self.haiku_line([], 0, next_words, 5)
print ' '.join(third)
if __name__ == '__main__': if __name__ == '__main__':
generator = PoemGenerator(nltk.corpus.gutenberg) generator = PoemGenerator(nltk.corpus.gutenberg)
print generator.generate_poem() #generator.generate_poem()
generator.generate_haiku()