|
@@ -212,42 +212,14 @@
|
212
|
212
|
"\n",
|
213
|
213
|
"We can partition by threes too:\n",
|
214
|
214
|
"\n",
|
215
|
|
- "(<span style=\"color:blue\">The</span> <span style=\"color:red\">quick brown</span>) (quick brown fox) ... (<span style=\"color:blue\">the</span> <span style=\"color:red\">lazy dog</span>)\n"
|
216
|
|
- ]
|
217
|
|
- },
|
218
|
|
- {
|
219
|
|
- "cell_type": "markdown",
|
220
|
|
- "metadata": {
|
221
|
|
- "slideshow": {
|
222
|
|
- "slide_type": "fragment"
|
223
|
|
- }
|
224
|
|
- },
|
225
|
|
- "source": [
|
|
215
|
+ "(<span style=\"color:blue\">The</span> <span style=\"color:red\">quick brown</span>) (quick brown fox) ... (<span style=\"color:blue\">the</span> <span style=\"color:red\">lazy dog</span>)\n",
|
|
216
|
+ "\n",
|
226
|
217
|
"Or, the condition can be two words (`condition = 'the lazy'`):\n",
|
227
|
218
|
"\n",
|
228
|
|
- "(The quick brown) (quick brown fox) ... (<span style=\"color:blue\">the lazy</span> <span span=\"color:red\">dog</span>)"
|
229
|
|
- ]
|
230
|
|
- },
|
231
|
|
- {
|
232
|
|
- "cell_type": "markdown",
|
233
|
|
- "metadata": {
|
234
|
|
- "slideshow": {
|
235
|
|
- "slide_type": "fragment"
|
236
|
|
- }
|
237
|
|
- },
|
238
|
|
- "source": [
|
|
219
|
+ "(The quick brown) (quick brown fox) ... (<span style=\"color:blue\">the lazy</span> <span style=\"color:red\">dog</span>)\n",
|
|
220
|
+ "\n",
|
|
221
|
+ "These are **trigrams**.\n",
|
239
|
222
|
"\n",
|
240
|
|
- "These are **trigrams**."
|
241
|
|
- ]
|
242
|
|
- },
|
243
|
|
- {
|
244
|
|
- "cell_type": "markdown",
|
245
|
|
- "metadata": {
|
246
|
|
- "slideshow": {
|
247
|
|
- "slide_type": "fragment"
|
248
|
|
- }
|
249
|
|
- },
|
250
|
|
- "source": [
|
251
|
223
|
"We can partition any **N** number of words together as **ngrams**."
|
252
|
224
|
]
|
253
|
225
|
},
|
|
@@ -343,7 +315,7 @@
|
343
|
315
|
"source": [
|
344
|
316
|
"words = ('The quick brown fox jumped over the '\n",
|
345
|
317
|
" 'lazy dog and the quick cat').split(' ')\n",
|
346
|
|
- "print words"
|
|
318
|
+ "print(words)"
|
347
|
319
|
]
|
348
|
320
|
},
|
349
|
321
|
{
|
|
@@ -409,6 +381,17 @@
|
409
|
381
|
"{k: dict(v) for k, v in dict(cfd).items()}"
|
410
|
382
|
]
|
411
|
383
|
},
|
|
384
|
+ {
|
|
385
|
+ "cell_type": "markdown",
|
|
386
|
+ "metadata": {
|
|
387
|
+ "slideshow": {
|
|
388
|
+ "slide_type": "slide"
|
|
389
|
+ }
|
|
390
|
+ },
|
|
391
|
+ "source": [
|
|
392
|
+ "## Conditional Frequency Distributions (CFDs) ##"
|
|
393
|
+ ]
|
|
394
|
+ },
|
412
|
395
|
{
|
413
|
396
|
"cell_type": "markdown",
|
414
|
397
|
"metadata": {
|
|
@@ -501,9 +484,9 @@
|
501
|
484
|
"word = random.choice(TEXT)\n",
|
502
|
485
|
"# generate 15 more words\n",
|
503
|
486
|
"for i in range(15):\n",
|
504
|
|
- " print word,\n",
|
|
487
|
+ " print(word + ' ', end='')\n",
|
505
|
488
|
" if word in cfd:\n",
|
506
|
|
- " word = random.choice(cfd[word].keys())\n",
|
|
489
|
+ " word = random.choice(list(cfd[word].keys()))\n",
|
507
|
490
|
" else:\n",
|
508
|
491
|
" break"
|
509
|
492
|
]
|
|
@@ -604,10 +587,12 @@
|
604
|
587
|
"cell_type": "markdown",
|
605
|
588
|
"metadata": {
|
606
|
589
|
"slideshow": {
|
607
|
|
- "slide_type": "fragment"
|
|
590
|
+ "slide_type": "slide"
|
608
|
591
|
}
|
609
|
592
|
},
|
610
|
593
|
"source": [
|
|
594
|
+ "# Syllables\n",
|
|
595
|
+ "\n",
|
611
|
596
|
"* poet: /ˈpoʊət/\n",
|
612
|
597
|
"* does: /ˈdʌz/\n",
|
613
|
598
|
"\n",
|
|
@@ -806,7 +791,7 @@
|
806
|
791
|
"source": [
|
807
|
792
|
"from stat_parser import Parser\n",
|
808
|
793
|
"parsed = Parser().parse('The quick brown fox jumps over the lazy dog.')\n",
|
809
|
|
- "print parsed"
|
|
794
|
+ "print(parsed)"
|
810
|
795
|
]
|
811
|
796
|
},
|
812
|
797
|
{
|
|
@@ -917,6 +902,18 @@
|
917
|
902
|
"[https://spacy.io/docs/api/#speed-comparison](https://spacy.io/docs/api/#speed-comparison)"
|
918
|
903
|
]
|
919
|
904
|
},
|
|
905
|
+ {
|
|
906
|
+ "cell_type": "markdown",
|
|
907
|
+ "metadata": {
|
|
908
|
+ "slideshow": {
|
|
909
|
+ "slide_type": "slide"
|
|
910
|
+ }
|
|
911
|
+ },
|
|
912
|
+ "source": [
|
|
913
|
+ "![Screenshot of displaCy, a dependency visualizer for spaCy](images/displacy.png)\n",
|
|
914
|
+ "[https://demos.explosion.ai/displacy/](https://demos.explosion.ai/displacy/)"
|
|
915
|
+ ]
|
|
916
|
+ },
|
920
|
917
|
{
|
921
|
918
|
"cell_type": "markdown",
|
922
|
919
|
"metadata": {
|
|
@@ -962,6 +959,20 @@
|
962
|
959
|
"[http://karpathy.github.io/2015/05/21/rnn-effectiveness/](http://karpathy.github.io/2015/05/21/rnn-effectiveness/)"
|
963
|
960
|
]
|
964
|
961
|
},
|
|
962
|
+ {
|
|
963
|
+ "cell_type": "markdown",
|
|
964
|
+ "metadata": {
|
|
965
|
+ "slideshow": {
|
|
966
|
+ "slide_type": "slide"
|
|
967
|
+ }
|
|
968
|
+ },
|
|
969
|
+ "source": [
|
|
970
|
+ "![Screenshot of word-rnn readme on Github](images/word-rnn.png)\n",
|
|
971
|
+ "[word-rnn](https://github.com/larspars/word-rnn)\n",
|
|
972
|
+ "\n",
|
|
973
|
+ "[word-rnn-tensorflow](https://github.com/hunkim/word-rnn-tensorflow)"
|
|
974
|
+ ]
|
|
975
|
+ },
|
965
|
976
|
{
|
966
|
977
|
"cell_type": "markdown",
|
967
|
978
|
"metadata": {
|
|
@@ -973,28 +984,30 @@
|
973
|
984
|
"source": [
|
974
|
985
|
"# The end #\n",
|
975
|
986
|
"\n",
|
976
|
|
- "Questions?"
|
|
987
|
+ "Questions?\n",
|
|
988
|
+ "\n",
|
|
989
|
+ "Full write up at: [hallada.net/blog](http://www.hallada.net/2017/07/11/generating-random-poems-with-python.html)"
|
977
|
990
|
]
|
978
|
991
|
}
|
979
|
992
|
],
|
980
|
993
|
"metadata": {
|
981
|
994
|
"celltoolbar": "Slideshow",
|
982
|
995
|
"kernelspec": {
|
983
|
|
- "display_name": "Python 2",
|
|
996
|
+ "display_name": "Python 3",
|
984
|
997
|
"language": "python",
|
985
|
|
- "name": "python2"
|
|
998
|
+ "name": "python3"
|
986
|
999
|
},
|
987
|
1000
|
"language_info": {
|
988
|
1001
|
"codemirror_mode": {
|
989
|
1002
|
"name": "ipython",
|
990
|
|
- "version": 2
|
|
1003
|
+ "version": 3
|
991
|
1004
|
},
|
992
|
1005
|
"file_extension": ".py",
|
993
|
1006
|
"mimetype": "text/x-python",
|
994
|
1007
|
"name": "python",
|
995
|
1008
|
"nbconvert_exporter": "python",
|
996
|
|
- "pygments_lexer": "ipython2",
|
997
|
|
- "version": "2.7.12"
|
|
1009
|
+ "pygments_lexer": "ipython3",
|
|
1010
|
+ "version": "3.5.2"
|
998
|
1011
|
},
|
999
|
1012
|
"livereveal": {
|
1000
|
1013
|
"scroll": true,
|