Skip to content

Commit

Permalink
Merge branch 'master' of github.com:AmyOlex/Chrono
Browse files Browse the repository at this point in the history
  • Loading branch information
AmyOlex committed Jun 8, 2018
2 parents 64729ee + 4504635 commit 91ec7b3
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 33 deletions.
58 changes: 28 additions & 30 deletions Chrono/temporalTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,19 +176,18 @@ def has24HourTime(text):
text_list = text_norm.split(' ')


if len(text_list)>0:
#loop through list looking for expression
for text in text_list:
if len(text) == 4:
num = utils.getNumberFromText(text)
if num is not None:
hour = utils.getNumberFromText(text[:2])
minute = utils.getNumberFromText(text[2:])
if (hour is not None) and (minute is not None):
if (minute > 60) or (hour > 24):
return False
else:
return True
#loop through list looking for expression
for text in text_list:
if len(text) == 4:
num = utils.getNumberFromText(text)
if num is not None:
hour = utils.getNumberFromText(text[:2])
minute = utils.getNumberFromText(text[2:])
if (hour is not None) and (minute is not None):
if (minute >= 60) or (hour >= 24):
return False
else:
return True

return False

Expand All @@ -208,23 +207,22 @@ def hasDateOrTime(text):
text_list = text_norm.split(' ')


if len(text_list)>0:
#loop through list looking for expression
for text in text_list:
if len(text) == 4:
num = utils.getNumberFromText(text)
if (num >= 1800) and (num <= 2050):
## for 4 digit years, but not all 4 digit numbers will be temporal. I set a specific range for 4-digit years.
return True
if len(text) == 6:
## could be yymmdd or mmddyy
## possible ranges for the year: 00 - 99
## possible ranges for the month: 01-12
## possible ranges for the day: 01-31
## It will be hard to narrow down these ranges at this point without context.
return True
if len(text) == 8:
#loop through list looking for expression
for text in text_list:
if len(text) == 4:
num = utils.getNumberFromText(text)
if (num >= 1800) and (num <= 2050):
## for 4 digit years, but not all 4 digit numbers will be temporal. I set a specific range for 4-digit years.
return True
if len(text) == 6:
## could be yymmdd or mmddyy
## possible ranges for the year: 00 - 99
## possible ranges for the month: 01-12
## possible ranges for the day: 01-31
## It will be hard to narrow down these ranges at this point without context.
return True
if len(text) == 8:
return True

return False

Expand Down Expand Up @@ -401,4 +399,4 @@ def hasModifierText(text):
return False
####
#END_MODULE
####
####
6 changes: 3 additions & 3 deletions Chrono/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@
import nltk
from nltk.tokenize import WhitespaceTokenizer
from nltk.stem.snowball import SnowballStemmer
from Chrono import chronoEntities as t6
# from Chrono import chronoEntities as t6
from Chrono import temporalTest as tt
import dateutil.parser
import datetime
from Chrono import TimePhrase_to_Chrono
# import datetime
# from Chrono import TimePhrase_to_Chrono
from Chrono import TimePhraseEntity as tp
import re
import csv
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Navigate to the Chrono folder. For a description of all available options use:
Prior to running Chrono you must have:

> 1) The input text files organized into the Anafora XML Directory Structure.
> 2) A machine learning (ML) training matrix and class information.
The ML matrix files utilized by Chrono in the SemEval 2018 Task 6 challenge are included in the "sample_files" directory provided with this system. You may use these, or create your own using the "Create ML Matrix" instructions below.
Expand Down Expand Up @@ -87,6 +88,7 @@ The evaluation can be customized to focus on specific entities. Read the Anafora
The machine learning methods require two files to operate: a data matrix and a class file. We provide a file that utilizes a window size of 5 in the "sample_files" directory, you can also create your own training file with different window sizes and on different subsets of training data. To create your own training file do the following:

> 1) Ensure all the gold standard data you want to utilize for training is in a separate directory structure than your testing data.
> 2) Run the python script Chrono_createMLTrainingMatrix.py script as follows (assuming your input text files and the gold standard XML files are in the same directory named "./data/my_input"):
```bash
Expand Down
1 change: 1 addition & 0 deletions _config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
theme: jekyll-theme-hacker

0 comments on commit 91ec7b3

Please sign in to comment.