-
Notifications
You must be signed in to change notification settings - Fork 0
/
cognateLanguage_AutoSentence.py
124 lines (102 loc) · 4.03 KB
/
cognateLanguage_AutoSentence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# automatically generates sentences based on source words
# from output_shortlist.txt
# with a=action (verb), d=descriptor (adjective/adverb), t=thing (noun/pronoun), c=connector (preposition)
import random
filename1 = 'output_shortlist.txt'
# get lines of file into a list:
with open(filename1,'r') as f1:
data = f1.readlines()
def justTwoInitSylls(word):
beforeThisIndex = 0
for vowel1 in word:
if vowel1 in 'aeiou':
afterThisIndex = word.index(vowel1)
break
for vowel2 in word[afterThisIndex+1:]:
if vowel2 in 'aeiou':
beforeThisIndex = word[afterThisIndex+1:].index(vowel2)+1 + afterThisIndex+1
break
if beforeThisIndex!=0:
word = word[:beforeThisIndex+1]
return word
def countVowels(word):
vowels = 'aeiou'
word = word.lower()
count = 0
for char in word:
if char in vowels:
count += 1
return count
def getWord(wordType='d',fullWords=True):
global data # needed to access data without constantly re-getting data first
if wordType!='c':
i = random.randrange(0,len(data))
label = data[i].split(',')[-1].strip() # need .strip() to remove whitespace character(s)
while label != wordType:
i = random.randrange(0,len(data))
label = data[i].split(',')[-1].strip() # need .strip() to remove whitespace character(s)
word = data[i].split(',')[0]
translation = data[i].split(',')[1]
elif wordType=='c': # ,c
connectorIndices = [7,8,231,232,233,234,235,236,237,238,239,240,241,242,243,312,369,370,371,377,421,449,491,493,631,642,643]
i = random.choice(connectorIndices)
word = data[i].split(',')[0]
translation = data[i].split(',')[1]
if fullWords==False:
word = justTwoInitSylls(word)
return word, translation
def buildSentence(pattern,fullWords=True):
sentence = ''
translation = ''
trackLastLetterOfLastWord = ''
for letter in pattern:
newword, newwordtrans = getWord(wordType=letter,fullWords=fullWords)
if fullWords:
sentence += newword + ' '
else: # if short word translation:
# trackLastLetterOfLastWord = newword[-1]
numVowelsInTranslatedWord = countVowels(newword)
sentence += ' ' + newword
# if numVowelsInTranslatedWord == 1:
# sentence += newword
# trackLastLetterOfLastWord = ''
# elif trackLastLetterOfLastWord in 'aeiou':
# sentence += newword
# else:
# sentence += newword[:-1]
translation += newwordtrans + ' '
# remove last space, capitalize first letter, add period at end:
if fullWords:
sentence, translation = sentence.strip().capitalize()+'.', translation.strip().capitalize()+'.'
else:
sentence, translation = sentence.strip()+trackLastLetterOfLastWord, translation.strip().capitalize()+'.'
return sentence, translation
# if this .py file is being run as a standalone by the user:
if __name__ == '__main__':
print('')
print('WARNING: Words are chosen at random within word types and may produce unexpected sentences.')
print('')
pattern = 'tat'
print('AUTO-GENERATED SENTENCE 1: ***1***, with pattern "'+pattern+'":')
sentence, translation = buildSentence(pattern,False)
print(sentence)
print(translation)
print('')
pattern = 'tact'
print('AUTO-GENERATED SENTENCE 2: ***2***, with pattern "'+pattern+'":')
sentence, translation = buildSentence(pattern,False)
print(sentence)
print(translation)
print('')
pattern = 'dtadtcdt'
print('AUTO-GENERATED SENTENCE 3: ***3***, with pattern "'+pattern+'":')
sentence, translation = buildSentence(pattern,False)
print(sentence)
print(translation)
print('')
pattern = 'tat'
print('AUTO-GENERATED SENTENCE 4: ***1***, with pattern "'+pattern+'" and full words:')
sentence, translation = buildSentence(pattern,True)
print(sentence)
print(translation)
print('')