diff --git a/app.py b/app.py index 6e1a3e5..430c9a8 100644 --- a/app.py +++ b/app.py @@ -10,7 +10,7 @@ def home(): return redirect('http://git.hallada.net/nlp/') -@app.route("/buzzfeed-haiku-generator/") +@app.route("/nlp/buzzfeed_haiku_generator/") def buzzfeed_haiku_generator(): haiku = generator.generate_haiku() return render_template('buzzfeed-haiku-generator.html', haiku=haiku) diff --git a/generate_poem.py b/generate_poem.py index 106cc27..ebab7c2 100644 --- a/generate_poem.py +++ b/generate_poem.py @@ -33,31 +33,31 @@ class PoemGenerator(): self.words = [] self.all_words = [] self.inflect_engine = inflect.engine() - # with open('/var/www/buzzfeed-haiku-generator/buzzfeed_facebook_statuses.csv', newline='', encoding='utf-8') as statuses: - # reader = csv.reader(statuses, delimiter=',') - # for row in reader: - # if 'via buzzfeed ' not in row[1].lower(): # only English - # # split title into a list of words and punctuation - # title = self.spaces_and_punctuation.findall(row[2]) - # # spell out digits into ordinal words for syllable counting - # title = [string.capwords( - # self.inflect_engine.number_to_words(int(word))) - # if word.isdigit() else word for word in title] - # self.sents.append(title) - # self.words.extend(title) - # # all_words only contains words, no punctuation - # self.all_words.extend([word for word in title - # if not - # self.only_punctuation.match(word)]) - with codecs.open('trump.txt', 'r', 'utf-8') as corpus: - text = corpus.read() - sents = nltk.tokenize.sent_tokenize(text) - words = nltk.tokenize.word_tokenize(text) - self.sents.extend(sents) - self.words.extend(words) - self.all_words.extend([word for word in words - if not - self.only_punctuation.match(word)]) + with open('buzzfeed_facebook_statuses.csv', newline='', encoding='utf-8') as statuses: + reader = csv.reader(statuses, delimiter=',') + for row in reader: + if 'via buzzfeed ' not in row[1].lower(): # only English + # split title into a list of words and punctuation + title = self.spaces_and_punctuation.findall(row[2]) + # spell out digits into ordinal words for syllable counting + title = [string.capwords( + self.inflect_engine.number_to_words(int(word))) + if word.isdigit() else word for word in title] + self.sents.append(title) + self.words.extend(title) + # all_words only contains words, no punctuation + self.all_words.extend([word for word in title + if not + self.only_punctuation.match(word)]) + # with codecs.open('trump.txt', 'r', 'utf-8') as corpus: + # text = corpus.read() + # sents = nltk.tokenize.sent_tokenize(text) + # words = nltk.tokenize.word_tokenize(text) + # self.sents.extend(sents) + # self.words.extend(words) + # self.all_words.extend([word for word in words + # if not + # self.only_punctuation.match(word)]) self.bigrams = list(nltk.bigrams(self.words)) self.cfd = nltk.ConditionalFreqDist(self.bigrams) #self.parser = Parser() @@ -142,5 +142,5 @@ if __name__ == '__main__': generator = PoemGenerator() #generator.generate_poem() haiku = generator.generate_haiku() - print haiku + print(haiku) #generator.generate_endless_poem(None) diff --git a/nlp.py b/nlp.py new file mode 100644 index 0000000..3a43937 --- /dev/null +++ b/nlp.py @@ -0,0 +1,4 @@ +from app import app + +if __name__ == '__main__': + app.run() diff --git a/nlp.wsgi b/nlp.wsgi deleted file mode 100755 index 6e49f78..0000000 --- a/nlp.wsgi +++ /dev/null @@ -1,8 +0,0 @@ -import os -import sys -sys.path.append('/var/www/nlp') - -import logging -logging.basicConfig(stream=sys.stderr) - -from app import app as application diff --git a/requirements.txt b/requirements.txt index 3b40a61..1e118a4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,12 @@ click==6.7 Flask==0.12.2 +gunicorn==19.7.1 inflect==0.2.5 itsdangerous==0.24 Jinja2==2.9.6 MarkupSafe==1.0 nltk==3.2.4 -pyStatParser==0.0.1 +# pyStatParser==0.0.1 manually install from git repo (python2 only) six==1.10.0 tqdm==4.14.0 Werkzeug==0.12.2