Browse Source

Track existing code

Tyler Hallada 8 years ago
commit
f1a295ab3a
4 changed files with 58 additions and 0 deletions
  1. 5 0
      .gitignore
  2. 11 0
      code_random_text.py
  3. 26 0
      generate_poem.py
  4. 16 0
      generate_random.py

+ 5 - 0
.gitignore

@@ -0,0 +1,5 @@
1
+[._]*.s[a-w][a-z]
2
+[._]s[a-w][a-z]
3
+*.pyc
4
+lib
5
+pyStatParser

+ 11 - 0
code_random_text.py

@@ -0,0 +1,11 @@
1
+import nltk
2
+# Natural Language Toolkit: code_random_text
3
+
4
+def generate_model(cfdist, word, num=15):
5
+    for i in range(num):
6
+        print word,
7
+        word = cfdist[word].max()
8
+
9
+text = nltk.corpus.genesis.words('english-kjv.txt')
10
+bigrams = nltk.bigrams(text)
11
+cfd = nltk.ConditionalFreqDist(bigrams) # [_bigram-condition]

+ 26 - 0
generate_poem.py

@@ -0,0 +1,26 @@
1
+import nltk
2
+import random
3
+from stat_parser import Parser
4
+
5
+
6
+class PoemGenerator():
7
+    def __init__(self, corpus):
8
+        self.sents = corpus.sents('austen-emma.txt')
9
+        self.bigrams = list(nltk.bigrams(corpus.words('austen-emma.txt')))
10
+        self.cfd = nltk.ConditionalFreqDist(self.bigrams)
11
+        self.parser = Parser()
12
+        self.history = []
13
+
14
+    def generate_poem(self):
15
+        sent = random.choice(self.sents)
16
+        parsed = self.parser.parse(' '.join(sent))
17
+        word = random.choice(self.bigrams)[0]
18
+        for i in range(15):
19
+            print word,
20
+            for gram in self.cfd[word].items():
21
+                import ipdb; ipdb.set_trace()  # BREAKPOINT
22
+
23
+
24
+if __name__ == '__main__':
25
+    generator = PoemGenerator(nltk.corpus.gutenberg)
26
+    print generator.generate_poem()

+ 16 - 0
generate_random.py

@@ -0,0 +1,16 @@
1
+import nltk
2
+import random
3
+
4
+TEXT = nltk.corpus.genesis.words('english-kjv.txt')
5
+
6
+
7
+def main():
8
+    bigrams = nltk.bigrams(TEXT)
9
+    cfdist = nltk.ConditionalFreqDist(bigrams)
10
+    word = random.choice(bigrams)[0]
11
+    for i in range(15):
12
+        print word,
13
+        word = cfdist[word].max()
14
+
15
+if __name__ == '__main__':
16
+    main()