-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextractor.py
40 lines (30 loc) · 895 Bytes
/
extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import sys
from nlp import get_tagged_words
from reader import read_file
from airtable import send_to_airtable
posDict = {
"ADV": "Adverb",
"ADJ": "Adjective",
"NOUN": "Noun"
}
def main(args):
text = read_file(args[1])
wordList = get_tagged_words(text)
# TODO construct visited words here by using fetch_from_airtable
sentWords = []
for i, word in enumerate(wordList):
if word.text in sentWords:
print("Skipped repeated word (%d/%d)" % (i+1, len(wordList)))
continue
print("Sending %s %s (%d/%d)" % (word.upos, word.text, i+1, len(wordList)))
data = {
"fields": {
"Word": word.text.lower(),
"POS": posDict[word.upos],
"Source": args[1].split(".")[0]
}
}
send_to_airtable(data)
sentWords.append(word.text)
if __name__ == '__main__':
main(sys.argv)