Skip to content

Commit

Permalink
Merge pull request #77 from AmyOlex/development
Browse files Browse the repository at this point in the history
Improvements to Chrono since SemEval 2018
  • Loading branch information
AmyOlex authored Aug 1, 2018
2 parents 9504ca6 + 6fa2069 commit e9f2942
Show file tree
Hide file tree
Showing 56 changed files with 15,255 additions and 2,969 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
anaforatools
kfoldoutput*
Chrono_TempEval2018_Post-Competition_Submissions/*
Chrono_SemEval2018_PostEvalSubmission_RF_NewswireClinicalModel_072918/*
Chrono_TempEval2018_Submission_NN/*
resultsEvaluation/*
resultsDebug/*
Expand Down Expand Up @@ -70,3 +71,5 @@ runit.sh
*.xml
Chrono_SemEval2018_PostEvalSubmission_NB_NewswireModel_060818/Chrono_TempEval2018_PostEvalSubmission_NB_060818.zip
*.tiff
*.dct
SemEval-OfficialTrain/*
31 changes: 22 additions & 9 deletions Chrono.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@
import pickle

from chronoML import DecisionTree as DTree
from chronoML import RF_classifier as RandomForest
from chronoML import NB_nltk_classifier as NBclass, ChronoKeras
from chronoML import SVM_classifier as SVMclass
from Chrono import TimePhrase_to_Chrono
from Chrono import BuildEntities
from Chrono import referenceToken
from Chrono import utils
from keras.models import load_model
Expand Down Expand Up @@ -90,6 +91,13 @@
classifier, feats = DTree.build_dt_model(args.d, args.c)
with open('DT_model.pkl', 'wb') as mod:
pickle.dump([classifier, feats], mod)

if(args.m == "RF" and args.M is None):
## Train the decision tree classifier and save in the classifier variable
# print("Got RF")
classifier, feats = RandomForest.build_model(args.d, args.c)
with open('RF_model.pkl', 'wb') as mod:
pickle.dump([classifier, feats], mod)

elif(args.m == "NN" and args.M is None):
#print("Got NN")
Expand Down Expand Up @@ -137,22 +145,27 @@
if(debug) : print(doctime)

## parse out reference tokens
text, tokens, spans, tags = utils.getWhitespaceTokens(infiles[f]+args.x)
text, tokens, spans, tags, sents = utils.getWhitespaceTokens(infiles[f]+args.x)
#my_refToks = referenceToken.convertToRefTokens(tok_list=tokens, span=spans, remove_stopwords="./Chrono/stopwords_short2.txt")
my_refToks = referenceToken.convertToRefTokens(tok_list=tokens, span=spans, pos=tags)
my_refToks = referenceToken.convertToRefTokens(tok_list=tokens, span=spans, pos=tags, sent_boundaries=sents)

if(debug) :
print("REFERENCE TOKENS:\n")
for tok in my_refToks : print(tok)


## mark all ref tokens if they are numeric or temporal
chroList = utils.markTemporal(my_refToks)

if(debug) :
print("REFERENCE TOKENS:\n")
for tok in chroList : print(tok)

tempPhrases = utils.getTemporalPhrases(chroList, doctime)

#for c in chroList:
# print(c)
if(debug):
for c in tempPhrases:
print(c)

chrono_master_list, my_chrono_ID_counter = TimePhrase_to_Chrono.buildChronoList(tempPhrases, my_chrono_ID_counter, chroList, (classifier, args.m), feats, doctime)

chrono_master_list, my_chrono_ID_counter = BuildEntities.buildChronoList(tempPhrases, my_chrono_ID_counter, chroList, (classifier, args.m), feats, doctime)

print("Number of Chrono Entities: " + str(len(chrono_master_list)))
utils.write_xml(chrono_list=chrono_master_list, outfile=outfiles[f])
Expand Down
Loading

0 comments on commit e9f2942

Please sign in to comment.