-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
152 lines (132 loc) · 4.61 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import string
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
# from nltk.corpus import uni
from rhymetagger import RhymeTagger
import re
import pronouncing
from string import punctuation
import numpy as np
d = nltk.corpus.cmudict.dict()
def count_syllables(word):
"""Count number of syllables in a word."""
if word in d:
syllable_count = max([len(list(y for y in x if y[-1].isdigit())) for x in d[word]])
return syllable_count
else:
# If the word is not found in the dictionary,
# use this manual syllable detection method
vowels = "aeiouy"
count = 0
prev_char_was_vowel = False
for char in word:
if char in vowels:
if not prev_char_was_vowel:
count += 1
prev_char_was_vowel = True
else:
prev_char_was_vowel = False
# Adjust for silent 'e' at the end
if word.lower().endswith('e') and count > 1:
count -= 1
print(f"BACKUPPPPPPPPPP The number of syllables in '{word}' is: {count}")
return count
def word_rhyme(poem):
""""Returns rhyme scheme of the poem."""""
rt = RhymeTagger()
rt.load_model(model = 'en')
rhymes = rt.tag(poem, output_format=3)
print(rhymes)
return rhymes
# rt2 = RhymeTagger.new_model(lang = "en")
# rt2.add_to_model()
def get_scansion_line(line):
"""Gets the scansion of the given line"""
tokens = line.split()
pos_tags = pos_tag(tokens)
print(tokens)
print(pos_tags)
# List of parts of speech to prioritize
keep_list = ['NN', 'NNP', 'NNPS', 'NNS', 'VB', 'VBG', 'VBN', 'VBP', 'VBZ']
scansion = []
for i in range(len(tokens)):
s = ""
if tokens[i] in d:
phonemes = d[tokens[i]][0]
for phoneme in phonemes:
if phoneme[-1] == '1' or phoneme[-1] == '2':
s += '1'
elif phoneme[-1] == '0':
s += '0'
if pos_tags[i][1] not in keep_list and len(s) == 1:
s = '0'
else:
s = '1' + '0' * (count_syllables(tokens[i]) - 1)
if len(s) > 1:
for char in s:
scansion.append(char)
else:
scansion.append(s)
print(scansion)
return scansion
def scansion_diff(scansion, correct):
"""Counts the number of syllables that are different between scansion and given meter."""
diff = 0
for i in range(len(scansion)):
if scansion[i] != correct[i]:
diff += 1
return diff
def meter_detector(poem):
"""Detect the type of meter in the poem, if any."""
meters = {
'iambic trimeter': ['0', '1', '0', '1', '0', '1'],
'iambic tetrameter': ['0', '1', '0', '1', '0', '1', '0', '1'],
'iambic pentameter': ['0', '1', '0', '1', '0', '1', '0', '1', '0', '1'],
'trochaic tetrameter': ['1', '0', '1', '0', '1', '0', '1', '0'],
'trochaic pentameter': ['1', '0', '1', '0', '1', '0', '1', '0', '1', '0']
}
scansions = []
for line in poem:
scansion = get_scansion_line(line)
scansions.append(scansion)
size = len(scansions[0])
if size == 6:
correct = meters['iambic trimeter']
found = True
for scansion in scansions:
diff = scansion_diff(scansion, correct)
if diff/size > 0.34:
found = False
break
if found == True:
print("Could be iambic trimeter")
return 'iambic trimeter'
elif size == 8:
correct = [("iambic tetrameter", meters['iambic tetrameter']),
("trochaic tetrameter", meters['trochaic tetrameter'])]
for c in correct:
found = True
for scansion in scansions:
diff = scansion_diff(scansion, c[1])
if diff/size > 0.34:
found = False
break
if found == True:
print("Could be", c[0])
return c[0]
# return found
elif size == 10:
correct = [("iambic pentameter", meters['iambic pentameter']),
("trochaic pentameter",meters['trochaic pentameter'])]
for c in correct:
found = True
for scansion in scansions:
diff = scansion_diff(scansion, c[1])
if diff/size > 0.34:
found = False
break
if found == True:
print("Could be", c[0])
return c[0]
return ''