diff --git a/build.xml b/build.xml index cb982bce6..533d00e98 100644 --- a/build.xml +++ b/build.xml @@ -13,6 +13,7 @@ + @@ -77,7 +78,7 @@ - + @@ -180,22 +181,27 @@ - - - - - + + + + - - - - - - + + + + + + + + + - - + + + + + diff --git a/java7.settings.xml b/java7.settings.xml new file mode 100644 index 000000000..55057accb --- /dev/null +++ b/java7.settings.xml @@ -0,0 +1,4 @@ + + + diff --git a/pom.xml b/pom.xml index 76effdc04..48cd86c4b 100644 --- a/pom.xml +++ b/pom.xml @@ -163,6 +163,17 @@ + + net.sf.jwordnet + jwnl + 1.4_rc3 + + + commons-logging + commons-logging + + + net.sf.opencsv @@ -239,6 +250,12 @@ true + + com.google.guava + guava + 17.0 + + org.apache.odftoolkit odfdom-java @@ -317,17 +334,6 @@ 3.2 - - net.sf.jwordnet - jwnl - 1.4_rc3 - - - commons-logging - commons-logging - - - org.linkgrammar linkgrammar @@ -349,12 +355,6 @@ - - - com.google.guava - guava-jdk5 - 17.0 - @@ -371,6 +371,13 @@ + + + com.google.guava + guava-jdk5 + 17.0 + + diff --git a/src/java_test/relex/test/RelExCases.java b/src/java_test/relex/test/RelExCases.java new file mode 100644 index 000000000..a2ad6eff8 --- /dev/null +++ b/src/java_test/relex/test/RelExCases.java @@ -0,0 +1,125 @@ +package relex.test; + +import java.io.InputStreamReader; +import java.net.URL; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import au.com.bytecode.opencsv.CSVReader; +import au.com.bytecode.opencsv.CSVWriter; + +import com.google.common.base.Function; +import com.google.common.base.Optional; +import com.google.common.base.Preconditions; +import com.google.common.collect.FluentIterable; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +/** + * @author ceefour + * + */ +public class RelExCases { + + private static final Logger log = LoggerFactory + .getLogger(RelExCases.class); + + static class RelExCase { + String sentence; + Set relations; + Optional description; + + RelExCase(String sentence, Set relations, Optional description) { + super(); + this.description = description; + this.sentence = sentence; + this.relations = relations; + } + } + + static class RelExCaseToObjectArray implements Function { + public Object[] apply(RelExCase input) { + return new Object[] { input.sentence, input.relations, input.description }; + } + } + + protected static ImmutableList parseTsv(String casesPath) { + URL casesTsv = Preconditions.checkNotNull(RelExCases.class.getResource(casesPath), + "Cannot load '%s' from classpath", casesPath); + log.info("Loading '{}'...", casesTsv); + try { + CSVReader reader = new CSVReader(new InputStreamReader(casesTsv.openStream()), + '\t', CSVWriter.DEFAULT_QUOTE_CHARACTER, CSVWriter.NO_ESCAPE_CHARACTER); + try { + Optional curDescription = Optional.absent(); + Optional curSentence = Optional.absent(); + ImmutableSet.Builder curRelations = ImmutableSet.builder(); + ImmutableList.Builder cases = ImmutableList.builder(); + reader.readNext(); // skip header line + while (true) { + String[] row = reader.readNext(); + if (row == null) { + break; + } + if (row.length == 0) { + continue; + } + if (row[0].startsWith("//")) { + // add previous sentence + if (curSentence.isPresent()) { + cases.add(new RelExCase(curSentence.get(), curRelations.build(), curDescription)); + curSentence = Optional.absent(); + curRelations = ImmutableSet.builder(); + } + curDescription = Optional.of(row[0].substring(2).trim()); + continue; + } + if (!row[0].trim().isEmpty()) { + // add previous sentence + if (curSentence.isPresent()) { + cases.add(new RelExCase(curSentence.get(), curRelations.build(), curDescription)); + curSentence = Optional.absent(); + curRelations = ImmutableSet.builder(); + } + // sentence row + curSentence = Optional.of(row[0].trim()); + } else if (row.length >= 2 && !row[1].trim().isEmpty()) { + // relation row + curRelations.add(row[1].trim()); + } + } + // add previous sentence + if (curSentence.isPresent()) { + cases.add(new RelExCase(curSentence.get(), curRelations.build(), curDescription)); + curSentence = Optional.absent(); + curRelations = ImmutableSet.builder(); + } + final ImmutableList caseList = cases.build(); + log.info("Got {} cases from '{}'", caseList.size(), casesPath); + return caseList; + } finally { + reader.close(); + } + } catch (Exception e) { + throw new RuntimeException("Cannot read " + casesTsv, e); + } + } + + public static Object[] provideComparatives() { + return FluentIterable.from(parseTsv("/relex-comparatives.tsv")) + .transform(new RelExCaseToObjectArray()).toArray(Object[].class); + } + + public static Object[] provideConjunction() { + return FluentIterable.from(parseTsv("/relex-conjunction.tsv")) + .transform(new RelExCaseToObjectArray()).toArray(Object[].class); + } + + public static Object[] provideExtraposition() { + return FluentIterable.from(parseTsv("/relex-extraposition.tsv")) + .transform(new RelExCaseToObjectArray()).toArray(Object[].class); + } + +} \ No newline at end of file diff --git a/src/java_test/relex/test/StanfordCases.java b/src/java_test/relex/test/StanfordCases.java new file mode 100644 index 000000000..40b6d2dbd --- /dev/null +++ b/src/java_test/relex/test/StanfordCases.java @@ -0,0 +1,120 @@ +package relex.test; + +import java.io.InputStreamReader; +import java.net.URL; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import au.com.bytecode.opencsv.CSVReader; +import au.com.bytecode.opencsv.CSVWriter; + +import com.google.common.base.Function; +import com.google.common.base.Optional; +import com.google.common.base.Preconditions; +import com.google.common.collect.FluentIterable; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +/** + * @author ceefour + * + */ +public class StanfordCases { + + private static final Logger log = LoggerFactory + .getLogger(StanfordCases.class); + + static class StanfordCase { + String sentence; + Set relations; + Optional description; + + StanfordCase(String sentence, Set relations, Optional description) { + super(); + this.description = description; + this.sentence = sentence; + this.relations = relations; + } + } + + static class StanfordCaseToObjectArray implements Function { + public Object[] apply(StanfordCase input) { + return new Object[] { input.sentence, input.relations, input.description }; + } + } + + protected static ImmutableList parseTsv(String casesPath) { + URL casesTsv = Preconditions.checkNotNull(StanfordCases.class.getResource(casesPath), + "Cannot load '%s' from classpath", casesPath); + log.info("Loading '{}'...", casesTsv); + try { + CSVReader reader = new CSVReader(new InputStreamReader(casesTsv.openStream()), + '\t', CSVWriter.DEFAULT_QUOTE_CHARACTER, CSVWriter.NO_ESCAPE_CHARACTER); + try { + Optional curDescription = Optional.absent(); + Optional curSentence = Optional.absent(); + ImmutableSet.Builder curRelations = ImmutableSet.builder(); + ImmutableList.Builder cases = ImmutableList.builder(); + reader.readNext(); // skip header line + while (true) { + String[] row = reader.readNext(); + if (row == null) { + break; + } + if (row.length == 0) { + continue; + } + if (row[0].startsWith("//")) { + // add previous sentence + if (curSentence.isPresent()) { + cases.add(new StanfordCase(curSentence.get(), curRelations.build(), curDescription)); + curSentence = Optional.absent(); + curRelations = ImmutableSet.builder(); + } + curDescription = Optional.of(row[0].substring(2).trim()); + continue; + } + if (!row[0].trim().isEmpty()) { + // add previous sentence + if (curSentence.isPresent()) { + cases.add(new StanfordCase(curSentence.get(), curRelations.build(), curDescription)); + curSentence = Optional.absent(); + curRelations = ImmutableSet.builder(); + } + // sentence row + curSentence = Optional.of(row[0].trim()); + } else if (row.length >= 2 && !row[1].trim().isEmpty()) { + // relation row + curRelations.add(row[1].trim()); + } + } + // add previous sentence + if (curSentence.isPresent()) { + cases.add(new StanfordCase(curSentence.get(), curRelations.build(), curDescription)); + curSentence = Optional.absent(); + curRelations = ImmutableSet.builder(); + } + final ImmutableList caseList = cases.build(); + log.info("Got {} cases from '{}'", caseList.size(), casesPath); + return caseList; + } finally { + reader.close(); + } + } catch (Exception e) { + throw new RuntimeException("Cannot read " + casesTsv, e); + } + } + + public static Object[] provideUntagged() { + return FluentIterable.from(parseTsv("/stanford-untagged.tsv")) + .transform(new StanfordCaseToObjectArray()).toArray(Object[].class); + } + + public static Object[] provideTagged() { + return FluentIterable.from(parseTsv("/stanford-tagged.tsv")) + .transform(new StanfordCaseToObjectArray()).toArray(Object[].class); + } + +} \ No newline at end of file diff --git a/src/java_test/relex/test/TestRelEx.java b/src/java_test/relex/test/TestRelEx.java index cce439784..cb5eb60f6 100644 --- a/src/java_test/relex/test/TestRelEx.java +++ b/src/java_test/relex/test/TestRelEx.java @@ -1,863 +1,123 @@ -/* - * Copyright 2009 Linas Vepstas - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package relex.test; - -import java.util.ArrayList; -import java.util.Collections; - -import org.junit.BeforeClass; -import org.junit.Test; - -import relex.ParsedSentence; -import relex.RelationExtractor; -import relex.Sentence; -import relex.output.SimpleView; - -public class TestRelEx -{ - private static RelationExtractor re; - private int pass; - private int fail; - private int subpass; - private int subfail; - private static ArrayList sentfail= new ArrayList(); - - @BeforeClass - public static void setUpClass() { - re = new RelationExtractor(); - } - - public TestRelEx() - { - pass = 0; - fail = 0; - subpass = 0; - subfail = 0; - } - - public ArrayList split(String a) - { - String[] sa = a.split("\n"); - ArrayList saa = new ArrayList(); - for (String s : sa) { - saa.add(s); - } - Collections.sort (saa); - return saa; - } - - /** - * First argument is the sentence. - * Second argument is a list of the relations that RelEx - * should be generating. - * Return true if RelEx generates the same dependencies - * as the second argument. - */ - public boolean test_sentence (String sent, String sf) - { - re.do_penn_tagging = false; - re.setMaxParses(1); - Sentence sntc = re.processSentence(sent); - ParsedSentence parse = sntc.getParses().get(0); - String rs = SimpleView.printBinaryRelations(parse); - String urs = SimpleView.printUnaryRelations(parse); - - ArrayList exp = split(sf); - ArrayList brgot = split(rs); - ArrayList urgot = split(urs); - - //add number of binary relations from parser-output, to total number of relationships got - int sizeOfGotRelations= brgot.size(); - //check expected binary and unary relations - //the below for-loop checks whether all expected binary relations are - //contained in the parser-binary-relation-output arrayList "brgot". - //if any unary relations are expected in the output it checks the - //parser-unary-relation-output arrayList "urgot" for unary relationships - for (int i=0; i< exp.size(); i++) - { - if(!brgot.contains(exp.get(i))) - { - if(!urgot.contains(exp.get(i))) - { - System.err.println("Error: content miscompare:\n" + - "\tExpected = " + exp + "\n" + - "\tGot Binary Relations = " + brgot + "\n" + - "\tGot Unary Relations = " + urgot + "\n" + - "\tSentence = " + sent); - subfail ++; - fail ++; - sentfail.add(sent); - return false; - } - //add the unary relation, count to totoal number of binary relations - sizeOfGotRelations++; - } - - } - //The size checking of the expected relationships vs output relationships - //is done here purposefully, to accommodate if there is any unary relationships present - //in the expected output(see above for-loop also). - //However it only checks whether parser-output resulted more relationships(binary+unary) than expected relations - //If the parser-output resulted less relationships(binary+unary) than expected it would - //catch that in the above for-loop - if (exp.size() < sizeOfGotRelations) - { - System.err.println("Error: size miscompare:\n" + - "\tExpected = " + exp + "\n" + - "\tGot Binary Relations = " + brgot + "\n" + - "\tGot Unary Relations = " + urgot + "\n" + - "\tSentence = " + sent); - subfail ++; - fail ++; - sentfail.add(sent); - return false; - } - - subpass ++; - pass ++; - return true; - } - - public void report(boolean rc, String subsys) - { - if (rc) { - System.err.println(subsys + ": Tested " + pass + " sentences, test passed OK"); - } else { - System.err.println(subsys + ": Test failed\n\t" + - fail + " sentences failed\n\t" + - pass + " sentences passed"); - } - subpass = 0; - subfail = 0; - } - - public boolean test_comparatives() - { - boolean rc = true; - rc &= test_sentence ("Some people like pigs less than dogs.", - "_advmod(like, less)\n" + - "_obj(like, pig)\n" + - "_quantity(people, some)\n" + - "_subj(like, people)\n" + - "than(pig, dog)\n"); - - rc &= test_sentence ("Some people like pigs more than dogs.", - "_advmod(like, more)\n" + - "_obj(like, pig)\n" + - "_quantity(people, some)\n" + - "_subj(like, people)\n" + - "than(pig, dog)\n"); - //Non-equal Gradable : Two entities one feature "more/less" - - rc &= test_sentence ("He is more intelligent than John.", - "than(he, John)\n" + - "_comparative(intelligent, he)\n" + - "degree(intelligent, comparative)\n"+ - "_predadj(he, intelligent)\n"); - - rc &= test_sentence ("He is less intelligent than John.", - "than(he, John)\n" + - "_comparative(intelligent, he)\n" + - "degree(intelligent, comparative)\n"+ - "_advmod(intelligent, less)\n"+ - "_predadj(he, intelligent)\n"); - - rc &= test_sentence ("He runs more quickly than John.", - "_advmod(run, quickly)\n"+ - "_advmod(quickly, more)\n"+ - "_subj(run, he)\n" + - "than(he, John)\n" + - "_comparative(quickly, run)\n" + - "degree(quickly, comparative)\n"); - - rc &= test_sentence ("He runs less quickly than John.", - "_advmod(run, quickly)\n" + - "_subj(run, he)\n" + - "_advmod(quickly, less)\n"+ - "than(he, John)\n" + - "_comparative(quickly, run)\n" + - "degree(quickly, comparative)\n"); - - rc &= test_sentence ("He runs more quickly than John does.", - "_advmod(run, quickly)\n" + - "_advmod(quickly, more)\n"+ - "_subj(run, he)\n" + - "_subj(do, John)\n"+ - "than(he, John)\n" + - "_comparative(quickly, run)\n" + - "degree(quickly, comparative)\n"); - - //This sentence is ungrammatical but commonly used by non-native English speakers - rc &= test_sentence ("He runs less quickly than John does.", - "_advmod(run, quickly)\n" + - "_subj(run, he)\n" + - "_subj(do, John)\n"+ - "_advmod(quickly, less)\n"+ - "than(he, John)\n" + - "_comparative(quickly, run)\n" + - "degree(quickly, comparative)\n"); - - rc &= test_sentence ("He runs slower than John does.", - "_advmod(run, slow)\n" + - "_subj(run, he)\n" + - "_subj(do, John)\n"+ - "than(he, John)\n" + - "_comparative(slow, run)\n" + - "degree(slow, comparative)\n"); - - rc &= test_sentence ("He runs more than John.", - "_obj(run, more)\n" + - "_subj(run, he)\n" + - "than(he, John)\n"+ - "_comparative(more, run)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("He runs less than John.", - "_obj(run, less)\n" + - "_subj(run, he)\n" + - "than(he, John)\n"+ - "_comparative(less, run)\n"+ - "degree(less, comparative)\n"); - - rc &= test_sentence ("He runs faster than John.", - "than(he, John)\n" + - "_comparative(fast, run)\n" + - "_subj(run, he)\n"+ - "_advmod(run, fast)\n"+ - "degree(fast, comparative)\n"); - - rc &= test_sentence ("He runs more slowly than John.", - "than(he, John)\n" + - "_subj(run, he)\n" + - "_advmod(slowly, more)\n"+ - "_comparative(slowly, run)\n"+ - "_advmod(run, slowly)\n"+ - "degree(slowly, comparative)\n"); - - rc &= test_sentence ("He runs less slowly than John.", - "than(he, John)\n" + - "_subj(run, he)\n" + - "_comparative(slowly, run)\n"+ - "_advmod(run, slowly)\n"+ - "_advmod(slowly, less)\n"+ - "degree(slowly, comparative)\n"); - - rc &= test_sentence ("He runs more miles than John does.", - "than(he, John)\n" + - "_subj(run, he)\n" + - "_subj(do, John)\n"+ - "_obj(run, mile)\n"+ - "_comparative(mile, run)\n"+ - "_quantity(mile, more)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("He runs less miles than John does.", - "than(he, John)\n" + - "_subj(run, he)\n" + - "_subj(do, John)\n"+ - "_obj(run, mile)\n"+ - "_comparative(mile, run)\n"+ - "_quantity(mile, less)\n"+ - "degree(less, comparative)\n"); - - rc &= test_sentence ("He runs many more miles than John does.", - "than(he, John)\n" + - "_comparative(mile, run)\n"+ - "_obj(run, mile)\n"+ - "_subj(run, he)\n" + - "_subj(do, John)\n" + - "_quantity(mile, many)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("He runs fewer miles than John does.", - "than(he, John)\n" + - "_comparative(mile, run)\n"+ - "_obj(run, mile)\n"+ - "_subj(run, he)\n" + - "_subj(do, John)\n" + - "_quantity(mile, fewer)\n"+ - "degree(fewer, comparative)\n"); - - rc &= test_sentence ("He runs ten more miles than John.", - "_obj(run, mile)\n"+ - "_subj(run, he)\n" + - "_quantity(mile, more)\n"+ - "than(he, John)\n" + - "_comparative(mile, run)\n"+ - "_num_quantity(miles, ten)\n" + - "degree(more, comparative)\n"); - - rc &= test_sentence ("He runs almost ten more miles than John does.", - "_obj(run, mile)\n"+ - "_subj(run, he)\n"+ - "_comparative(mile, run)\n"+ - "_subj(do, John)\n"+ - "than(he, John)\n"+ - "_quantity_mod(ten, almost)\n"+ - "_num_quantity(miles, ten)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("He runs more often than John.", - "_subj(run, he)\n"+ - "_advmod(often, more)\n"+ - "_advmod(run, often)\n"+ - "_comparative(often, run)\n"+ - "than(he, John)\n"+ - "degree(often, comparative)\n"); - - rc &= test_sentence ("He runs less often than John.", - "_subj(run, he)\n"+ - "_advmod(often, less)\n"+ - "_advmod(run, often)\n"+ - "_comparative(often, run)\n"+ - "than(he, John)\n"+ - "degree(often, comparative)\n"); - - rc &= test_sentence ("He runs here more often than John.", - "_advmod(run, here)\n"+ - "_advmod(often, more)\n"+ - "_advmod(run, often)\n"+ - "_subj(run, he)\n"+ - "_comparative(often, run)\n"+ - "than(he, John)\n"+ - "degree(often, comparative)\n"); - - rc &= test_sentence ("He runs here less often than John.", - "_advmod(run, here)\n"+ - "_advmod(often, less)\n"+ - "_advmod(run, often)\n"+ - "_subj(run, he)\n"+ - "_comparative(often, run)\n"+ - "than(he, John)\n"+ - "degree(often, comparative)\n"); - - rc &= test_sentence ("He is faster than John.", - "than(he, John)\n"+ - "_predadj(he, fast)\n"+ - "_comparative(fast, be)\n"+ - "degree(fast, comparative)\n"); - - rc &= test_sentence ("He is faster than John is.", - "than(he, John)\n"+ - "_predadj(he, fast)\n"+ - "_subj(be, John)\n"+ - "_comparative(fast, be)\n"+ - "degree(fast, comparative)\n"); - - rc &= test_sentence ("His speed is faster than John's.", - "than(speed, be)\n"+ - "_predadj(speed, fast)\n"+ - "_poss(speed, him)\n"+ - "_comparative(fast, be)\n"+ - "degree(fast, comparative)\n"); - - rc &= test_sentence ("I run more than Ben.", - "_subj(run, I)\n"+ - "_obj(run, more)\n"+ - "_comparative(more, run)\n"+ - "than(I, Ben)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("I run less than Ben.", - "_subj(run, I)\n"+ - "_obj(run, less)\n"+ - "_comparative(less, run)\n"+ - "than(I, Ben)\n"+ - "degree(less, comparative)\n"); - - rc &= test_sentence ("I run more miles than Ben.", - "_subj(run, I)\n"+ - "_obj(run, mile)\n"+ - "_quantity(mile, more)\n"+ - "_comparative(mile, run)\n"+ - "than(I, Ben)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("I run fewer miles than Ben.", - "_subj(run, I)\n"+ - "_obj(run, mile)\n"+ - "_quantity(mile, fewer)\n"+ - "_comparative(mile, run)\n"+ - "than(I, Ben)\n"+ - "degree(fewer, comparative)\n"); - - rc &= test_sentence ("I run 10 more miles than Ben.", - "_subj(run, I)\n"+ - "_obj(run, mile)\n"+ - "_num_quantity(mile, 10)\n"+ - "_quantity_mod(10, more)\n"+ - "_comparative(mile, run)\n"+ - "than(I, Ben)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("I run 10 fewer miles than Ben.", - "_subj(run, I)\n"+ - "_obj(run, mile)\n"+ - "_num_quantity(mile, 10)\n"+ - "_quantity_mod(10, fewer)\n"+ - "_comparative(mile, run)\n"+ - "than(I, Ben)\n"+ - "degree(fewer, comparative)\n"); - - rc &= test_sentence ("I run more often than Ben.", - "_subj(run, I)\n"+ - "_advmod(run, often)\n"+ - "_comparative(often, run)\n"+ - "than(I, Ben)\n"+ - "degree(often, comparative)\n"+ - "_advmod(often, more)\n"); - - rc &= test_sentence ("I run less often than Ben.", - "_subj(run, I)\n"+ - "_advmod(run, often)\n"+ - "_comparative(often, run)\n"+ - "than(I, Ben)\n"+ - "degree(often, comparative)\n"+ - "_advmod(often, less)\n"); - - rc &= test_sentence ("I run more often than Ben does.", - "_subj(run, I)\n"+ - "_subj(do, Ben)\n"+ - "_advmod(run, often)\n"+ - "_comparative(often, run)\n"+ - "than(I, Ben)\n"+ - "degree(often, comparative)\n"+ - "_advmod(often, more)\n"); - - rc &= test_sentence ("I run less often than Ben does.", - "_subj(run, I)\n"+ - "_subj(do, Ben)\n"+ - "_advmod(run, often)\n"+ - "_comparative(often, run)\n"+ - "than(I, Ben)\n"+ - "degree(often, comparative)\n"+ - "_advmod(often, less)\n"); - - rc &= test_sentence ("I run more often than Ben climbs.", - "_subj(run, I)\n"+ - "_subj(climb, Ben)\n"+ - "_comparative(often, run)\n"+ - "than(I, Ben)\n"+ - "than1(run, climb)\n"+ - "degree(often, comparative)\n"+ - "_advmod(run, often)\n"+ - "_advmod(often, more)\n"); - - rc &= test_sentence ("I run less often than Ben climbs.", - "_subj(run, I)\n"+ - "_subj(climb, Ben)\n"+ - "_comparative(often, run)\n"+ - "than(I, Ben)\n"+ - "than1(run, climb)\n"+ - "degree(often, comparative)\n"+ - "_advmod(run, often)\n"+ - "_advmod(often, less)\n"); - - rc &= test_sentence ("I run more races than Ben wins contests.", - "_subj(run, I)\n"+ - "_obj(run, race)\n"+ - "_subj(win, Ben)\n"+ - "_obj(win, contest)\n"+ - "_quantity(race, more)\n"+ - "_comparative(race, run)\n"+ - "than(I, Ben)\n"+ - "than1(run, climb)\n"+ - "than2(race, contest)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("I run fewer races than Ben wins contests.", - "_subj(run, I)\n"+ - "_obj(run, race)\n"+ - "_subj(win, Ben)\n"+ - "_obj(win, contest)\n"+ - "_quantity(race, fewer)\n"+ - "_comparative(race, run)\n"+ - "than(I, Ben)\n"+ - "than1(run, climb)\n"+ - "than2(race, contest)\n"+ - "degree(fewer, comparative)\n"); - - rc &= test_sentence ("I have more chairs than Ben.", - "_obj(have, chair)\n"+ - "_subj(have, I)\n"+ - "than(I, Ben)\n"+ - "_comparative(chair, have)\n"+ - "_quantity(chair, more)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("I have fewer chairs than Ben.", - "_obj(have, chair)\n"+ - "_subj(have, I)\n"+ - "than(I, Ben)\n"+ - "_comparative(chair, have)\n"+ - "_quantity(chair, fewer)\n"+ - "degree(fewer, comparative)\n"); - - rc &= test_sentence ("He earns much more money than I do.", - "_obj(earn, money)\n"+ - "_subj(do, I)\n"+ - "_subj(earn, he)\n"+ - "than(he,I)\n"+ - "_comparative(money,earn)\n"+ - "_quantity(money, more)\n"+ - "_advmod(more, much)\n"+ - "degree(more,comparative)\n"); - - rc &= test_sentence ("He earns much less money than I do.", - "_obj(earn, money)\n"+ - "_subj(do, I)\n"+ - "_subj(earn, he)\n"+ - "than(he, I)\n"+ - "_comparative(money, earn)\n"+ - "_quantity(money, less)\n"+ - "_advmod(less, much)\n"+ - "degree(less, comparative)\n"); - - rc &= test_sentence ("She comes here more often than her husband.", - "_advmod(come, here)\n"+ - "_advmod(often, more)\n"+ - "_advmod(come, often)\n"+ - "_subj(come, she)\n"+ - "_poss(husband, her)\n"+ - "_comparative(often, come)\n"+ - "than(she, husband)\n"+ - "degree(often, comparative)\n"); - - rc &= test_sentence ("She comes here less often than her husband.", - "_advmod(come, here)\n"+ - "_advmod(often, less)\n"+ - "_advmod(come, often)\n"+ - "_subj(come, she)\n"+ - "_poss(husband, her)\n"+ - "_comparative(often, come)\n"+ - "than(she, husband)\n"+ - "degree(often, comparative)\n"); - - rc &= test_sentence ("Russian grammar is more difficult than English grammar.", - "_comparative(difficult, grammar)\n"+ - "than(grammar, grammar)\n"+ - "_amod(grammar, Russian)\n"+ //When link-grammar uses A, relex should use _amod it will use A instead of AN; will be updated in next linkgrammer version - "_predadj(grammar, difficult)\n"+ - "_amod(grammar, English)\n"+ - "degree(difficult, comparative)\n"); - - rc &= test_sentence ("Russian grammar is less difficult than English grammar.", - "_comparative(difficult, grammar)\n"+ - "than(grammar, grammar)\n"+ - "_amod(grammar, Russian)\n"+ - "_predadj(grammar, difficult)\n"+ - "_amod(grammar, English)\n"+ - "_advmod(difficult, less)\n"+ - "degree(difficult, comparative)\n"); - - rc &= test_sentence ("My sister is much more intelligent than me.", - "_amod(much, intelligent)\n"+ - "_predadj(sister, intelligent)\n"+ - "_poss(sister, me)\n"+ - "than(sister, me)\n"+ - "_comparative(intelligent, sister)\n"+ - "degree(intelligent, comparative)\n"); - - rc &= test_sentence ("My sister is much less intelligent than me.", - "_amod(much, intelligent)\n"+ - "_predadj(sister, intelligent)\n"+ - "_poss(sister, me)\n"+ - "than(sister, me)\n"+ - "_comparative(intelligent, sister)\n"+ - "_advmod(intelligent, less)\n"+ - "degree(intelligent, comparative)\n"); - - rc &= test_sentence ("I find maths lessons more enjoyable than science lessons.", - "_iobj(find, maths)\n"+ - "_obj(find, lesson)\n"+ - "_subj(find, I)\n"+ - "_amod(lesson, enjoyable)\n"+ - "_nn(lesson, science)\n"+ - "than(maths, science)\n"+ - "_comparative(enjoyable, maths)\n"+ - "degree(enjoyable, comparative)\n"); - - rc &= test_sentence ("I find maths lessons less enjoyable than science lessons.", - "_iobj(find, maths)\n"+ - "_obj(find, lesson)\n"+ - "_subj(find, I)\n"+ - "_amod(lesson, enjoyable)\n"+ - "_nn(lesson, science)\n"+ - "than(maths, science)\n"+ - "_comparative(enjoyable, maths)\n"+ - "_advmod(enjoyable, less)\n"+ - "degree(enjoyable, comparative)\n"); - report(rc, "Comparatives"); - return rc; - } - public boolean test_Conjunction() - { - boolean rc = true; - //conjoined verbs - rc &= test_sentence ("Scientists make observations and ask questions.", - "_obj(make, observation)\n" + - "_obj(ask, question)\n" + - "_subj(make, scientist)\n" + - "_subj(ask, scientist)\n" + - "conj_and(make, ask)\n"); - //conjoined nouns - rc &= test_sentence ("She is a student and an employee.", - "_obj(be, student)\n" + - "_obj(be, employee)\n" + - "_subj(be, she)\n" + - "conj_and(student, employee)\n"); - //conjoined adjectives - rc &= test_sentence ("I hailed a black and white taxi.", - "_obj(hail, taxi)\n" + - "_subj(hail, I)\n" + - "_amod(taxi, black)\n" + - "_amod(taxi, white)\n" + - "conj_and(black, white)\n"); - //conjoined adverbs - rc &= test_sentence ("She ran quickly and quietly.", - "_advmod(run, quickly)\n" + - "_advmod(run, quietly)\n" + - "_subj(run, she)\n" + - "conj_and(quickly, quietly)\n"); - //adjectival modifiers on conjoined subject - rc &= test_sentence ("The big truck and the little car collided.", - "_amod(car, little)\n" + - "_amod(truck, big)\n" + - "_subj(collide, truck)\n" + - "_subj(collide, car)\n" + - "conj_and(truck, car)\n"); - //verbs with modifiers - rc &= test_sentence ( "We ate dinner at home and went to the movies.", - "_obj(eat, dinner)\n" + - "conj_and(eat, go)\n" + - "at(eat, home)\n" + - "_subj(eat, we)\n" + - "to(go, movie)\n" + - "_subj(go, we)\n"); - //verb with more modifiers - rc &= test_sentence ("We ate a late dinner at home and went out to the movies afterwards.", - "_obj(eat, dinner)\n" + - "conj_and(eat, go_out)\n" + - "at(eat, home)\n" + - "_subj(eat, we)\n" + - "to(go_out, movie)\n" + - "_advmod(go_out, afterwards)\n" + - "_subj(go_out, we)\n" + - "_amod(dinner, late)\n"); - - //conjoined ditransitive verbs - rc &= test_sentence ("She baked him a cake and sang him a song.", - "_iobj(sing, him)\n" + - "_obj(sing, song)\n" + - "_subj(sing, she)\n" + - "_iobj(bake, him)\n" + - "_obj(bake, cake)\n" + - "conj_and(bake, sing)\n" + - "_subj(bake, she)\n"); - //conjoined adverbs with modifiers - rc &= test_sentence ("she ran very quickly and extremely quietly.", - "_advmod(run, quickly)\n" + - "_advmod(run, quietly)\n" + - "_subj(run, she)\n" + - "_advmod(quietly, extremely)\n" + - "conj_and(quickly, quietly)\n" + - "_advmod(quickly, very)\n"); - //conjoined adverbs with out modifiers - rc &= test_sentence ("She handled it quickly and gracefully.", - "_obj(handle, quickly)\n" + - "_obj(handle, gracefully)\n" + - "_advmod(handle, quickly)\n" + - "_advmod(handle, gracefully)\n" + - "_subj(handle, she)\n" + - "conj_and(quickly, gracefully)\n"); - //modifiers on conjoined adjectives - rc &= test_sentence ("He had very long and very white hair.", - "_obj(have, hair)\n" + - "_subj(have, he)\n" + - "_amod(hair, long)\n" + - "_amod(hair, white)\n" + - "_advmod(white, very)\n" + - "conj_and(long, white)\n" + - "_advmod(long, very)\n"); - //adjectival modifiers on conjoined object - rc &= test_sentence ("The collision was between the little car and the big truck.", - "_pobj(between, car)\n" + - "_pobj(between, truck)\n" + - "_psubj(between, collision)\n" + - "_amod(truck, big)\n" + - "_amod(car, little)\n" + - "conj_and(car, truck)\n"); - //Names Modifiers and conjunction - rc &= test_sentence ("Big Tom and Angry Sue went to the movies.", - "to(go, movie)\n" + - "_subj(go, Big_Tom)\n" + - "_subj(go, Angry_Sue)\n" + - "conj_and(Big_Tom, Angry_Sue)\n"); - - report(rc, "Conjunction"); - return rc; - } - public boolean test_extraposition() - { - boolean rc = true; - rc &= test_sentence ("The woman who lives next door is a registered nurse.", - "_obj(be, nurse)\n" + - "_subj(be, woman)\n" + - "_amod(nurse, registered)\n" + - "_advmod(live, next_door)\n" + - "_subj(live, woman)\n" + - "who(woman, live)\n"); - - rc &= test_sentence ("A player who is injured has to leave the field.", - "_to-do(have, leave)\n" + - "_subj(have, player)\n" + - "_obj(leave, field)\n" + - "_predadj(player, injured)\n" + - "who(player, injured)\n" ); - - rc &= test_sentence ("Pizza, which most people love, is not very healthy.", - "_advmod(very, not)\n" + - "_advmod(healthy, very)\n" + - "_obj(love, Pizza)\n" + - "_quantity(people, most)\n" + - "which(Pizza, love)\n" + - "_subj(love, people)\n" + - "_predadj(Pizza, healthy)\n" ); - - rc &= test_sentence ("The restaurant which belongs to my aunt is very famous.", - "_advmod(famous, very)\n" + - "to(belong, aunt)\n" + - "_subj(belong, restaurant)\n" + - "_poss(aunt, me)\n" + - "which(restaurant, belong)\n" + - "_predadj(restaurant, famous)\n"); - - rc &= test_sentence ("The books which I read in the library were written by Charles Dickens.", - "_obj(write, book)\n" + - "by(write, Charles_Dickens)\n" + - "_obj(read, book)\n" + - "in(read, library)\n" + - "_subj(read, I)\n" + - "which(book, read)\n"); - - rc &= test_sentence("This is the book whose author I met in a library.", - "_obj(be, book)\n" + - "_subj(be, this)\n" + - "_obj(meet, author)\n" + - "in(meet, library)\n" + - "_subj(meet, I)\n" + - "whose(book, author)\n"); - - rc &= test_sentence("The book that Jack lent me is very boring.", - "_advmod(boring, very)\n" + - "_iobj(lend, book)\n" + - "_obj(lend, me)\n" + - "_subj(lend, Jack)\n" + - "that(book, lend)\n" + - "_predadj(book, boring)\n"); - - rc &= test_sentence("They ate a special curry which was recommended by the restaurant’s owner.", - "_obj(eat, curry)\n" + - "_subj(eat, they)\n" + - "_obj(recommend, curry)\n" + - "by(recommend, owner)\n" + - "_poss(owner, restaurant)\n" + - "which(curry, recommend)\n" + - "_amod(curry, special)\n"); - - rc &= test_sentence("The dog who Jack said chased me was black.", - "_obj(chase, me)\n" + - "_subj(chase, dog)\n" + - "_subj(say, Jack)\n" + - "_predadj(dog, black)\n" + - "who(dog, chase)\n"); - - rc &= test_sentence("Jack, who hosted the party, is my cousin.", - "_obj(be, cousin)\n" + - "_subj(be, Jack)\n" + - "_poss(cousin, me)\n" + - "_obj(host, party)\n" + - "_subj(host, Jack)\n" + - "who(Jack, host)\n"); - - rc &= test_sentence("Jack, whose name is in that book, is the student near the window.", - "near(be, window)\n" + - "_obj(be, student)\n" + - "_subj(be, Jack)\n" + - "_pobj(in, book)\n" + - "_psubj(in, name)\n" + - "_det(book, that)\n" + - "whose(Jack, name)\n"); - - rc &= test_sentence("Jack stopped the police car that was driving fast.", - "_obj(stop, car)\n" + - "_subj(stop, Jack)\n" + - "_advmod(drive, fast)\n" + - "_subj(drive, car)\n" + - "that(car, drive)\n" + - "_nn(car, police)\n"); - - rc &= test_sentence("Just before the crossroads, the car was stopped by a traffic sign that stood on the street.", - "_obj(stop, car)\n" + - "by(stop, sign)\n" + - "_advmod(stop, just)\n" + - "on(stand, street)\n" + - "_subj(stand, sign)\n" + - "that(sign, stand)\n" + - "_nn(sign, traffic)\n" + - "before(just, crossroads)\n"); - - report(rc, "Extrapostion"); - return rc; - } - - - public static void main(String[] args) - { - setUpClass(); - TestRelEx ts = new TestRelEx(); - ts.runTests(); - } - - @Test - public void runTests() { - TestRelEx ts = this; - boolean rc = true; - - rc &= ts.test_comparatives(); - rc &= ts.test_extraposition(); - rc &= ts.test_Conjunction(); - - if (rc) { - System.err.println("Tested " + ts.pass + " sentences, test passed OK"); - } else { - System.err.println("Test failed\n\t" + - ts.fail + " sentences failed\n\t" + - ts.pass + " sentences passed"); - } - - System.err.println("******************************"); - System.err.println("Failed test sentences on Relex"); - System.err.println("******************************"); - if(sentfail.isEmpty()) - System.err.println("All test sentences passed"); - for(String temp : sentfail){ - System.err.println(temp); - } - System.err.println("******************************\n"); - } -} +/* + * Copyright 2009 Linas Vepstas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package relex.test; + +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.hasItem; +import static org.hamcrest.Matchers.hasSize; +import static org.junit.Assert.assertThat; + +import java.util.List; +import java.util.Set; + +import junitparams.JUnitParamsRunner; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; + +import relex.ParsedSentence; +import relex.RelationExtractor; +import relex.Sentence; +import relex.output.SimpleView; + +import com.google.common.base.Optional; +import com.google.common.base.Splitter; +import com.google.common.collect.ImmutableList; + +@RunWith(JUnitParamsRunner.class) +public class TestRelEx { + private static RelationExtractor re; + + @BeforeClass + public static void setUpClass() { + re = new RelationExtractor(); + } + + /** + * Succeds if RelEx generates the same relations as {@code sf}. + * @param sentence Sentence + * @param sf set of the relations that RelEx should be generating. + * @param description Description + */ + protected void testSentence(String sentence, Set sf, Optional description) { + re.do_penn_tagging = false; + re.setMaxParses(1); + Sentence sntc = re.processSentence(sentence); + ParsedSentence parse = sntc.getParses().get(0); + String rs = SimpleView.printBinaryRelations(parse); + String urs = SimpleView.printUnaryRelations(parse); + + List exp = ImmutableList.copyOf(sf); + List brgot = Splitter.on("\n").omitEmptyStrings().splitToList(rs); + List urgot = Splitter.on("\n").omitEmptyStrings().splitToList(urs); + + //add number of binary relations from parser-output, to total number of relationships got + int sizeOfGotRelations= brgot.size(); + //check expected binary and unary relations + //the below for-loop checks whether all expected binary relations are + //contained in the parser-binary-relation-output arrayList "brgot". + //if any unary relations are expected in the output it checks the + //parser-unary-relation-output arrayList "urgot" for unary relationships + for (int i=0; i< exp.size(); i++) + { + if(!brgot.contains(exp.get(i))) + { + assertThat("content miscompare:\n" + + "\tExpected = " + exp + "\n" + + "\tGot Binary Relations = " + brgot + "\n" + + "\tGot Unary Relations = " + urgot + "\n" + + "\tSentence = " + sentence, + urgot, hasItem(exp.get(i))); + //add the unary relation, count to total number of binary relations + sizeOfGotRelations++; + } + + } + //The size checking of the expected relationships vs output relationships + //is done here purposefully, to accommodate if there is any unary relationships present + //in the expected output(see above for-loop also). + //However it only checks whether parser-output resulted more relationships(binary+unary) than expected relations + //If the parser-output resulted less relationships(binary+unary) than expected it would + //catch that in the above for-loop + assertThat("size miscompare:\n" + + "\tExpected = " + exp + "\n" + + "\tGot Binary Relations = " + brgot + "\n" + + "\tGot Unary Relations = " + urgot + "\n" + + "\tSentence = " + sentence, + exp, hasSize(greaterThanOrEqualTo(sizeOfGotRelations))); + } + + @Test + @junitparams.Parameters(source=RelExCases.class, method="provideComparatives") + public void comparatives(String sentence, Set expected, Optional description) { + testSentence(sentence, expected, description); + } + + @Test + @junitparams.Parameters(source=RelExCases.class, method="provideConjunction") + public void conjunction(String sentence, Set expected, Optional description) { + testSentence(sentence, expected, description); + } + + @Test + @junitparams.Parameters(source=RelExCases.class, method="provideExtraposition") + public void extraposition(String sentence, Set expected, Optional description) { + testSentence(sentence, expected, description); + } + +} diff --git a/src/java_test/relex/test/TestStanford.java b/src/java_test/relex/test/TestStanford.java index 5db4b33a3..3822d08c9 100644 --- a/src/java_test/relex/test/TestStanford.java +++ b/src/java_test/relex/test/TestStanford.java @@ -1,596 +1,102 @@ -/* - * Copyright 2009 Linas Vepstas - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package relex.test; - -import java.util.ArrayList; -import java.util.Collections; - -import org.junit.BeforeClass; -import org.junit.Test; - -import relex.ParsedSentence; -import relex.RelationExtractor; -import relex.Sentence; -import relex.output.StanfordView; - -public class TestStanford -{ - private static RelationExtractor re; - private int pass; - private int fail; - private static ArrayList sentfail= new ArrayList(); - private static ArrayList sentfailpostag= new ArrayList(); - - @BeforeClass - public static void setUpClass() { - re = new RelationExtractor(); - } - - public TestStanford() - { - re.do_stanford = true; - pass = 0; - fail = 0; - } - - public ArrayList split(String a) - { - String[] sa = a.split("\n"); - ArrayList saa = new ArrayList(); - for (String s : sa) - { - saa.add(s); - } - Collections.sort (saa); - return saa; - } - - /** - * First argument is the sentence. - * Second argument is a list of the relations that the - * Stanford parser generates. - * Return true if relex generates that same dependencies - * as the second argument. - */ - public boolean test_sentence (String sent, String sf) - { - re.do_penn_tagging = false; - Sentence sntc = re.processSentence(sent); - ParsedSentence parse = sntc.getParses().get(0); - String rs = StanfordView.printRelations(parse, false); - - ArrayList sfa = split(sf); - ArrayList rsa = split(rs); - if (sfa.size() != rsa.size()) - { - System.err.println("Error: size miscompare:\n" + - "\tStanford = " + sfa + "\n" + - "\tRelEx = " + rsa + "\n" + - "\tSentence = " + sent); - fail ++; - sentfail.add(sent); - return false; - } - for (int i=0; i< sfa.size(); i++) - { - if (!sfa.get(i).equals (rsa.get(i))) - { - System.err.println("Error: content miscompare:\n" + - "\tStanford = " + sfa + "\n" + - "\tRelEx = " + rsa + "\n" + - "\tSentence = " + sent); - fail ++; - sentfail.add(sent); - return false; - } - } - - pass ++; - return true; - } - - public boolean test_tagged_sentence (String sent, String sf) - { - re.do_penn_tagging = true; - Sentence sntc = re.processSentence(sent); - ParsedSentence parse = sntc.getParses().get(0); - String rs = StanfordView.printRelations(parse, true); - - ArrayList sfa = split(sf); - ArrayList rsa = split(rs); - if (sfa.size() != rsa.size()) - { - System.err.println("Error: size miscompare:\n" + - "\tStanford = " + sfa + "\n" + - "\tRelEx = " + rsa + "\n" + - "\tSentence = " + sent); - fail ++; - sentfailpostag.add(sent); - return false; - } - for (int i=0; i< sfa.size(); i++) - { - if (!sfa.get(i).equals (rsa.get(i))) - { - System.err.println("Error: content miscompare:\n" + - "\tStanford = " + sfa + "\n" + - "\tRelEx = " + rsa + "\n" + - "\tSentence = " + sent); - fail ++; - sentfailpostag.add(sent); - return false; - } - } - - pass ++; - return true; - } - - public static void main(String[] args) - { - setUpClass(); - TestStanford ts = new TestStanford(); - ts.runTests(); - } - - @Test - public void runTests() { - TestStanford ts = this; - boolean rc = true; - - // The parses below were compared to the Stanford parser, circa - // 2009. Since then, it is likely that Stanford has changed. - // The tests below should not be changed, unless a) they are - // changed to be comaptible with current-day Stanford, and b) they - // pass. - rc &= ts.test_sentence ("Who invented sliced bread?", - "nsubj(invented-2, who-1)\n" + - "amod(bread-4, sliced-3)\n" + - "dobj(invented-2, bread-4)"); - - rc &= ts.test_sentence ("Jim runs quickly.", - "nsubj(runs-2, Jim-1)\n" + - "advmod(runs-2, quickly-3)"); - - rc &= ts.test_sentence ("The bird, a robin, sang sweetly.", - "det(bird-2, the-1)\n" + - "nsubj(sang-7, bird-2)\n" + - "det(robin-5, a-4)\n" + - "appos(bird-2, robin-5)\n" + - "advmod(sang-7, sweetly-8)"); - - rc &= ts.test_sentence ("There is a place we can go.", - "expl(is-2, there-1)\n" + - "det(place-4, a-3)\n" + - "nsubj(is-2, place-4)\n" + - "nsubj(go-7, we-5)\n" + - "aux(go-7, can-6)"); - // wtf ?? dep is not documented .. not sure what to do here ... - // "dep(is-2, go-7)"); - - rc &= ts.test_sentence ("The linebacker gave the quarterback a push.", - "det(linebacker-2, the-1)\n" + - "nsubj(gave-3, linebacker-2)\n" + - "det(quarterback-5, the-4)\n" + - "iobj(gave-3, quarterback-5)\n" + - "det(push-7, a-6)\n" + - "dobj(gave-3, push-7)\n"); - - rc &= ts.test_sentence ("He stood at the goal line.", - "nsubj(stood-2, he-1)\n" + - "det(line-6, the-4)\n" + - "nn(line-6, goal-5)\n" + - "prep_at(stood-2, line-6)"); - - // acomp example from Stanford docs - rc &= ts.test_sentence ("She looks very beautiful.", - "nsubj(looks-2, she-1)\n" + - "advmod(beautiful-4, very-3)\n" + - "acomp(looks-2, beautiful-4)"); - - // advcl example from Stanford docs - rc &= ts.test_sentence ("The accident happened as the night was falling.", - "det(accident-2, the-1)\n" + - "nsubj(happened-3, accident-2)\n" + - "mark(falling-8, as-4)\n" + - "det(night-6, the-5)\n" + - "nsubj(falling-8, night-6)\n" + - "aux(falling-8, was-7)\n" + - "advcl(happened-3, falling-8)"); - - // advcl example from Stanford docs - rc &= ts.test_sentence ("If you know who did it, you should tell the teacher.", - "mark(know-3, if-1)\n" + - "nsubj(know-3, you-2)\n" + - "advcl(tell-10, know-3)\n" + - "nsubj(did-5, who-4)\n" + - "ccomp(know-3, did-5)\n" + - "dobj(did-5, it-6)\n" + - "nsubj(tell-10, you-8)\n" + - "aux(tell-10, should-9)\n" + - "det(teacher-12, the-11)\n" + - "dobj(tell-10, teacher-12)"); - - // agent example from Stanford docs - rc &= ts.test_sentence ("The man has been killed by the police.", - "det(man-2, the-1)\n" + - "nsubjpass(killed-5, man-2)\n" + - "aux(killed-5, has-3)\n" + - "auxpass(killed-5, been-4)\n" + - "det(police-8, the-7)\n" + - "agent(killed-5, police-8)"); - - rc &= ts.test_sentence ("Effects caused by the protein are important.", - "nsubj(important-7, effects-1)\n" + - "partmod(effects-1, caused-2)\n" + - "det(protein-5, the-4)\n" + - "agent(caused-2, protein-5)\n" + - "cop(important-7, are-6)"); - - rc &= ts.test_sentence ("Sam, my brother, has arrived.", - "nsubj(arrived-7, Sam-1)\n" + - "poss(brother-4, my-3)\n" + - "appos(Sam-1, brother-4)\n" + - "aux(arrived-7, has-6)"); - - rc &= ts.test_sentence ("What is that?", - "attr(is-2, what-1)\n" + - "nsubj(is-2, that-3)"); - - rc &= ts.test_sentence ("Reagan has died.", - "nsubj(died-3, Reagan-1)\n" + - "aux(died-3, has-2)"); - - rc &= ts.test_sentence ("He should leave.", - "nsubj(leave-3, he-1)\n" + - "aux(leave-3, should-2)"); - - rc &= ts.test_sentence ("Kennedy has been killed.", - "nsubjpass(killed-4, Kennedy-1)\n" + - "aux(killed-4, has-2)\n" + - "auxpass(killed-4, been-3)"); - - rc &= ts.test_sentence ("Kennedy was killed.", - "nsubjpass(killed-3, Kennedy-1)\n" + - "auxpass(killed-3, was-2)"); - - rc &= ts.test_sentence ("Kennedy got killed.", - "nsubjpass(killed-3, Kennedy-1)\n" + - "auxpass(killed-3, got-2)"); - - rc &= ts.test_sentence ("Bill is big.", - "nsubj(big-3, Bill-1)\n" + - "cop(big-3, is-2)\n"); - - rc &= ts.test_sentence ("Bill is an honest man.", - "nsubj(man-5, Bill-1)\n" + - "cop(man-5, is-2)\n" + - "det(man-5, an-3)\n" + - "amod(man-5, honest-4)"); - - rc &= ts.test_sentence ("What she said makes sense.", - "dobj(said-3, what-1)\n" + - "nsubj(said-3, she-2)\n" + - "csubj(makes-4, said-3)\n" + - "dobj(makes-4, sense-5)"); - - rc &= ts.test_sentence ("What she said is not true.", - "dobj(said-3, what-1)\n" + - "nsubj(said-3, she-2)\n" + - "csubj(true-6, said-3)\n" + - "cop(true-6, is-4)\n" + - "neg(true-6, not-5)"); - - rc &= ts.test_sentence ("Which book do you prefer?", - "det(book-2, which-1)\n" + - "dobj(prefer-5, book-2)\n" + - "aux(prefer-5, do-3)\n" + - "nsubj(prefer-5, you-4)"); - - rc &= ts.test_sentence ("There is a ghost in the room.", - "expl(is-2, there-1)\n" + - "det(ghost-4, a-3)\n" + - "nsubj(is-2, ghost-4)\n" + - "det(room-7, the-6)\n" + - "prep_in(is-2, room-7)"); - - rc &= ts.test_sentence ("She gave me a raise.", - "nsubj(gave-2, she-1)\n" + - "iobj(gave-2, me-3)\n" + - "det(raise-5, a-4)\n" + - "dobj(gave-2, raise-5)"); - - rc &= ts.test_sentence ("The director is 65 years old.", - "det(director-2, the-1)\n" + - "nsubj(old-6, director-2)\n" + - "cop(old-6, is-3)\n" + - "num(years-5, 65-4)\n" + - "measure(old-6, years-5)"); - - rc &= ts.test_sentence ("Sam eats 3 sheep.", - "nsubj(eats-2, Sam-1)\n" + - "num(sheep-4, 3-3)\n" + - "dobj(eats-2, sheep-4)"); - -/**************** - * I don't get it. Stanford makes a num/number distinction I can't grok. - rc &= ts.test_sentence ("I lost $ 3.2 billion.", - "nsubj(lost-2, I-1)\n" + - "dobj(lost-2, $-3)\n" + - "number($-3, 3.2-4)\n" + - "number($-3, billion-5)"); -***********/ - - rc &= ts.test_sentence ("Truffles picked during the spring are tasty.", - "nsubj(tasty-7, truffles-1)\n" + - "partmod(truffles-1, picked-2)\n" + - "det(spring-5, the-4)\n" + - "prep_during(picked-2, spring-5)\n" + - "cop(tasty-7, are-6)"); - -/**************** - * Currently fails due to xcomp generation problems - * - rc &= ts.test_sentence ("We went to their offices to get Bill's clothes.", - "nsubj(went-2, we-1)\n" + - "xsubj(get-7, we-1)\n" + - "poss(offices-5, their-4)\n" + - "prep_to(went-2, offices-5)\n" + - "aux(get-7, to-6)\n" + - "xcomp(went-2, get-7)\n" + - "poss(clothes-10, Bill-8)\n" + - "dobj(get-7, clothes-10)"); -***********/ - -/**************** - * See README-Stanford for details. - rc &= ts.test_sentence ("All the boys are here.", - "predet(boys-3, all-1)\n" + - "det(boys-3, the-2)\n" + - "nsubj(are-4, boys-3)\n" + - "advmod(are-4, here-5)"); -***********/ - -/**************** - * These are ambiguous parses. - * Stanford picks the opposite choice from Relex. - * See the README-Stanford for a discussion. - rc &= ts.test_sentence ("I saw a cat in a hat.", - "nsubj(saw-2, I-1)\n" + - "det(cat-4, a-3)\n" + - "dobj(saw-2, cat-4)\n" + - "det(hat-7, a-6)\n" + - "prep_in(cat-4, hat-7)"); - - rc &= ts.test_sentence ("I saw a cat with a telescope.", - "nsubj(saw-2, I-1)\n" + - "det(cat-4, a-3)\n" + - "dobj(saw-2, cat-4)\n" + - "det(telescope-7, a-6)\n" + - "prep_with(cat-4, telescope-7)"); -***********/ - - rc &= ts.test_sentence ("He is responsible for meals.", - "nsubj(responsible-3, he-1)\n" + - "cop(responsible-3, is-2)\n" + - "prep_for(responsible-3, meals-5)\n"); - - rc &= ts.test_sentence ("They shut down the station.", - "nsubj(shut-2, they-1)\n" + - "prt(shut-2, down-3)\n" + - "det(station-5, the-4)\n" + - "dobj(shut-2, station-5)"); - - rc &= ts.test_sentence ("About 200 people came to the party.", - "quantmod(200-2, about-1)\n" + - "num(people-3, 200-2)\n" + - "nsubj(came-4, people-3)\n" + - "det(party-7, the-6)\n" + - "prep_to(came-4, party-7)"); - - rc &= ts.test_sentence ("I saw the man who you love.", - "nsubj(saw-2, I-1)\n" + - "det(man-4, the-3)\n" + - "dobj(saw-2, man-4)\n" + - "dobj(love-7, man-4)\n" + - "rel(love-7, who-5)\n" + - "nsubj(love-7, you-6)\n" + - "rcmod(man-4, love-7)"); - - -/**************** - * - * relex is failing to generate teh following: - * Almost got it w/the B** rules but not quite ... - -rel(love-8, wife-6) -rcmod(man-4, love-8) - - rc &= ts.test_sentence ("I saw the man whose wife you love.", - "nsubj(saw-2, I-1)\n" + - "det(man-4, the-3)\n" + - "dobj(saw-2, man-4)\n" + - "poss(wife-6, whose-5)\n" + - "dobj(love-8, wife-6)\n" + - "rel(love-8, wife-6)\n" + - "nsubj(love-8, you-7)\n" + - "rcmod(man-4, love-8)"); -***********/ - - rc &= ts.test_sentence ("I am ready to leave.", - "nsubj(ready-3, I-1)\n" + - "cop(ready-3, am-2)\n" + - "aux(leave-5, to-4)\n" + - "xcomp(ready-3, leave-5)"); - - rc &= ts.test_sentence ("Tom likes to eat fish.", - "nsubj(likes-2, Tom-1)\n" + - "xsubj(eat-4, Tom-1)\n" + - "aux(eat-4, to-3)\n" + - "xcomp(likes-2, eat-4)\n" + - "dobj(eat-4, fish-5)"); - - -/**************** - rc &= ts.test_sentence ("He says that you like to swim.", - "nsubj(says-2, he-1)\n" + - "complm(like-5, that-3)\n" + - "nsubj(like-5, you-4)\n" + - "ccomp(says-2, like-5)\n" + - "nsubj(swim-7, to-6)\n" + // NFW that this can be right. - "ccomp(like-5, swim-7)"); -***********/ - - -/**************** - rc &= ts.test_sentence ("The garage is next to the house.", - "det(garage-2, the-1)\n" + - "nsubj(next-4, garage-2)\n" + - "cop(next-4, is-3)\n" + - "det(house-7, the-6)\n" + - "prep_to(next-4, house-7)"); -***********/ - - // ========================================================= - // PENN PART_OF_SPEECH TAGGING - // ========================================================= - // - rc &= ts.test_tagged_sentence ("Truffles picked during the spring are tasty.", - "nsubj(tasty-7-JJ, truffles-1-NNS)\n" + - "partmod(truffles-1-NNS, picked-2-VBN)\n" + - "det(spring-5-NN, the-4-DT)\n" + - "prep_during(picked-2-VBN, spring-5-NN)\n" + - "cop(tasty-7-JJ, are-6-VBP)"); - - rc &= ts.test_tagged_sentence ("I ate twelve truffles.", - "nsubj(ate-2-VBD, I-1-PRP)\n" + - "num(truffles-4-NNS, twelve-3-CD)\n" + - "dobj(ate-2-VBD, truffles-4-NNS)"); - - rc &= ts.test_tagged_sentence ("I have eaten twelve truffles.", - "nsubj(eaten-3-VBN, I-1-PRP)\n" + - "aux(eaten-3-VBN, have-2-VBP)\n" + - "num(truffles-5-NNS, twelve-4-CD)\n" + - "dobj(eaten-3-VBN, truffles-5-NNS)"); - - rc &= ts.test_tagged_sentence ("I had eaten twelve truffles.", - "nsubj(eaten-3-VBN, I-1-PRP)\n" + - "aux(eaten-3-VBN, had-2-VBD)\n" + - "num(truffles-5-NNS, twelve-4-CD)\n" + - "dobj(eaten-3-VBN, truffles-5-NNS)"); - - rc &= ts.test_tagged_sentence ("The truffles were eaten.", - "det(truffles-2-NNS, the-1-DT)\n" + - "nsubjpass(eaten-4-VBN, truffles-2-NNS)\n" + - "auxpass(eaten-4-VBN, were-3-VBD)"); - - - // Full disclosure: Stanford currently generates - // dep(time-4-NN, young-8-JJ) which just means it doesn't know - // the right answer (which is advcl, right?). - // It also generates advmod(young-8-JJ, when-5-WRB) in addition - // to rel(young-8-JJ, when-5-WRB) which is not quite right - // either. - rc &= ts.test_tagged_sentence ("There was a time when we were young.", - "expl(was-2-VBD, there-1-EX)\n" + - "det(time-4-NN, a-3-DT)\n" + - "nsubj(was-2-VBD, time-4-NN)\n" + - "rel(young-8-JJ, when-5-WRB)\n" + - "nsubj(young-8-JJ, we-6-PRP)\n" + - "cop(young-8-JJ, were-7-VBD)\n" + - "advcl(time-4-NN, young-8-JJ)"); - - rc &= ts.test_tagged_sentence ("Is there a better way?", - "expl(is-1-VBZ, there-2-EX)\n" + - "det(way-5-NN, a-3-DT)\n" + - "amod(way-5-NN, better-4-JJR)\n" + - "nsubj(is-1-VBZ, way-5-NN)"); - - rc &= ts.test_tagged_sentence ("Is this the largest you can find?", - "cop(largest-4-JJS, is-1-VBZ)\n" + - "nsubj(largest-4-JJS, this-2-DT)\n" + - "det(largest-4-JJS, the-3-DT)\n" + - "nsubj(find-7-VB, you-5-PRP)\n" + - "aux(find-7-VB, can-6-MD)\n" + - "rcmod(largest-4-JJS, find-7-VB)"); - - rc &= ts.test_tagged_sentence ("But my efforts to win his heart have failed.", - "poss(efforts-3-NNS, my-2-PRP$)\n" + - "nsubj(failed-9-VBN, efforts-3-NNS)\n" + - "aux(win-5-VB, to-4-TO)\n" + - "infmod(efforts-3-NNS, win-5-VB)\n" + - "poss(heart-7-NN, his-6-PRP$)\n" + - "dobj(win-5-VB, heart-7-NN)\n" + - "aux(failed-9-VBN, have-8-VBP)"); - - rc &= ts.test_tagged_sentence ("The undergrads are occasionally late.", - "det(undergrads-2-NNS, the-1-DT)\n" + - "nsubj(late-5-JJ, undergrads-2-NNS)\n" + - "cop(late-5-JJ, are-3-VBP)\n" + - "advmod(late-5-JJ, occasionally-4-RB)"); - - rc &= ts.test_tagged_sentence ("The height of Mount Everest is 8,848 metres.", - "det(height-2-NN, the-1-DT)\n" + - "nsubj(metres-8-NNS, height-2-NN)\n" + - "nn(Everest-5-NNP, Mount-4-NNP)\n" + - "prep_of(height-2-NN, Everest-5-NNP)\n" + - "cop(metres-8-NNS, is-6-VBZ)\n" + - "num(metres-8-NNS, 8,848-7-CD)"); - - rc &= ts.test_tagged_sentence ("It happened on December 3rd, 1990.", - "nsubj(happened-2-VBD, it-1-PRP)\n" + - "prep_on(happened-2-VBD, December-4-NNP)\n" + - "num(December-4-NNP, 3rd-5-CD)\n" + - "num(December-4-NNP, 1990-7-CD)"); - - - if (rc) - { - System.err.println("Tested " + ts.pass + " sentences, test passed OK"); - } - else - { - System.err.println("Test failed\n\t" + - ts.fail + " sentences failed\n\t" + - ts.pass + " sentences passed"); - } - - System.err.println("********************************************************"); - System.err.println("Failed test sentences on Stanford with POS tagging FALSE"); - System.err.println("********************************************************"); - - if(sentfail.isEmpty()) - System.err.println("All test sentences passed"); - - for(String temp : sentfail){ - System.err.println(temp); - } - System.err.println("********************************************************\n"); - - System.err.println("********************************************************"); - System.err.println("Failed test sentences on Stanford with POS tagging TRUE"); - System.err.println("********************************************************"); - - if(sentfailpostag.isEmpty()) - System.err.println("All test sentences passed"); - - for(String temp : sentfailpostag){ - System.err.println(temp); - } - System.err.println("********************************************************\n"); - } -} +/* + * Copyright 2009 Linas Vepstas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package relex.test; + +import static org.hamcrest.Matchers.hasSize; +import static org.junit.Assert.assertThat; + +import java.util.List; +import java.util.Set; + +import junitparams.JUnitParamsRunner; + +import org.hamcrest.Matchers; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; + +import relex.ParsedSentence; +import relex.RelationExtractor; +import relex.Sentence; +import relex.output.StanfordView; + +import com.google.common.base.Optional; +import com.google.common.base.Splitter; +import com.google.common.collect.ImmutableList; + +@RunWith(JUnitParamsRunner.class) +public class TestStanford { + private static RelationExtractor re; + + @BeforeClass + public static void setUpClass() { + re = new RelationExtractor(); + re.do_stanford = true; + } + + /** + * First argument is the sentence. + * Second argument is a list of the relations that the + * Stanford parser generates. + * Return true if relex generates that same dependencies + * as the second argument. + */ + @Test + @junitparams.Parameters(source=StanfordCases.class, method="provideUntagged") + public void untaggedSentence(String sent, Set sf, Optional description) { + re.do_penn_tagging = false; + Sentence sntc = re.processSentence(sent); + ParsedSentence parse = sntc.getParses().get(0); + String rs = StanfordView.printRelations(parse, false); + + List sfa = ImmutableList.copyOf(sf); + List rsa = Splitter.on("\n").omitEmptyStrings().splitToList(rs); + assertThat("Error: size miscompare:\n" + + "\tStanford = " + sfa + "\n" + + "\tRelEx = " + rsa + "\n" + + "\tSentence = " + sent, + sfa, hasSize(rsa.size())); + assertThat("Error: content miscompare:\n" + + "\tStanford = " + sfa + "\n" + + "\tRelEx = " + rsa + "\n" + + "\tSentence = " + sent, + sfa, Matchers.containsInAnyOrder(rsa.toArray(new String[] {}))); + } + + @Test + @junitparams.Parameters(source=StanfordCases.class, method="provideTagged") + public void taggedSentence(String sent, Set sf, Optional description) { + re.do_penn_tagging = true; + Sentence sntc = re.processSentence(sent); + ParsedSentence parse = sntc.getParses().get(0); + String rs = StanfordView.printRelations(parse, true); + + List sfa = ImmutableList.copyOf(sf); + List rsa = Splitter.on("\n").omitEmptyStrings().splitToList(rs); + assertThat("Error: size miscompare:\n" + + "\tStanford = " + sfa + "\n" + + "\tRelEx = " + rsa + "\n" + + "\tSentence = " + sent, + sfa, hasSize(rsa.size())); + assertThat("Error: content miscompare:\n" + + "\tStanford = " + sfa + "\n" + + "\tRelEx = " + rsa + "\n" + + "\tSentence = " + sent, + sfa, Matchers.containsInAnyOrder(rsa.toArray(new String[] {}))); + } + +} diff --git a/src/test/resources/logback.xml b/src/test/resources/logback.xml new file mode 100644 index 000000000..caa2d35f2 --- /dev/null +++ b/src/test/resources/logback.xml @@ -0,0 +1,16 @@ + + + + + %d{HH:mm:ss.SSS} %highlight(%-5level) | %magenta(%-16.16thread) %cyan(%-32.32logger{1}) | %msg%n + + + + + + + + + + + diff --git a/src/test/resources/relex-comparatives.tsv b/src/test/resources/relex-comparatives.tsv new file mode 100644 index 000000000..e20614a0a --- /dev/null +++ b/src/test/resources/relex-comparatives.tsv @@ -0,0 +1,455 @@ +Sentence Relations +Some people like pigs less than dogs. + _advmod(like, less) + _obj(like, pig) + _quantity(people, some) + _subj(like, people) + than(pig, dog) + +Some people like pigs more than dogs. + _advmod(like, more) + _obj(like, pig) + _quantity(people, some) + _subj(like, people) + than(pig, dog) + +//Non-equal Gradable : Two entities one feature more/less +He is more intelligent than John. + than(he, John) + _comparative(intelligent, he) + degree(intelligent, comparative) + _predadj(he, intelligent) + +He is less intelligent than John. + than(he, John) + _comparative(intelligent, he) + degree(intelligent, comparative) + _advmod(intelligent, less) + _predadj(he, intelligent) + +He runs more quickly than John. + _advmod(run, quickly) + _advmod(quickly, more) + _subj(run, he) + than(he, John) + _comparative(quickly, run) + degree(quickly, comparative) + +He runs less quickly than John. + _advmod(run, quickly) + _subj(run, he) + _advmod(quickly, less) + than(he, John) + _comparative(quickly, run) + degree(quickly, comparative) + +He runs more quickly than John does. + _advmod(run, quickly) + _advmod(quickly, more) + _subj(run, he) + _subj(do, John) + than(he, John) + _comparative(quickly, run) + degree(quickly, comparative) + +//This sentence is ungrammatical but commonly used by non-native English speakers +He runs less quickly than John does. + _advmod(run, quickly) + _subj(run, he) + _subj(do, John) + _advmod(quickly, less) + than(he, John) + _comparative(quickly, run) + degree(quickly, comparative) + +He runs slower than John does. + _advmod(run, slow) + _subj(run, he) + _subj(do, John) + than(he, John) + _comparative(slow, run) + degree(slow, comparative) + +He runs more than John. + _obj(run, more) + _subj(run, he) + than(he, John) + _comparative(more, run) + degree(more, comparative) + +He runs less than John. + _obj(run, less) + _subj(run, he) + than(he, John) + _comparative(less, run) + degree(less, comparative) + +He runs faster than John. + than(he, John) + _comparative(fast, run) + _subj(run, he) + _advmod(run, fast) + degree(fast, comparative) + +He runs more slowly than John. + than(he, John) + _subj(run, he) + _advmod(slowly, more) + _comparative(slowly, run) + _advmod(run, slowly) + degree(slowly, comparative) + +He runs less slowly than John. + than(he, John) + _subj(run, he) + _comparative(slowly, run) + _advmod(run, slowly) + _advmod(slowly, less) + degree(slowly, comparative) + +He runs more miles than John does. + than(he, John) + _subj(run, he) + _subj(do, John) + _obj(run, mile) + _comparative(mile, run) + _quantity(mile, more) + degree(more, comparative) + +He runs less miles than John does. + than(he, John) + _subj(run, he) + _subj(do, John) + _obj(run, mile) + _comparative(mile, run) + _quantity(mile, less) + degree(less, comparative) + +He runs many more miles than John does. + than(he, John) + _comparative(mile, run) + _obj(run, mile) + _subj(run, he) + _subj(do, John) + _quantity(mile, many) + degree(more, comparative) + +He runs fewer miles than John does. + than(he, John) + _comparative(mile, run) + _obj(run, mile) + _subj(run, he) + _subj(do, John) + _quantity(mile, fewer) + degree(fewer, comparative) + +He runs ten more miles than John. + _obj(run, mile) + _subj(run, he) + _quantity(mile, more) + than(he, John) + _comparative(mile, run) + _num_quantity(miles, ten) + degree(more, comparative) + +He runs almost ten more miles than John does. + _obj(run, mile) + _subj(run, he) + _comparative(mile, run) + _subj(do, John) + than(he, John) + _quantity_mod(ten, almost) + _num_quantity(miles, ten) + degree(more, comparative) + +He runs more often than John. + _subj(run, he) + _advmod(often, more) + _advmod(run, often) + _comparative(often, run) + than(he, John) + degree(often, comparative) + +He runs less often than John. + _subj(run, he) + _advmod(often, less) + _advmod(run, often) + _comparative(often, run) + than(he, John) + degree(often, comparative) + +He runs here more often than John. + _advmod(run, here) + _advmod(often, more) + _advmod(run, often) + _subj(run, he) + _comparative(often, run) + than(he, John) + degree(often, comparative) + +He runs here less often than John. + _advmod(run, here) + _advmod(often, less) + _advmod(run, often) + _subj(run, he) + _comparative(often, run) + than(he, John) + degree(often, comparative) + +He is faster than John. + than(he, John) + _predadj(he, fast) + _comparative(fast, be) + degree(fast, comparative) + +He is faster than John is. + than(he, John) + _predadj(he, fast) + _subj(be, John) + _comparative(fast, be) + degree(fast, comparative) + +His speed is faster than John's. + than(speed, be) + _predadj(speed, fast) + _poss(speed, him) + _comparative(fast, be) + degree(fast, comparative) + +I run more than Ben. + _subj(run, I) + _obj(run, more) + _comparative(more, run) + than(I, Ben) + degree(more, comparative) + +I run less than Ben. + _subj(run, I) + _obj(run, less) + _comparative(less, run) + than(I, Ben) + degree(less, comparative) + +I run more miles than Ben. + _subj(run, I) + _obj(run, mile) + _quantity(mile, more) + _comparative(mile, run) + than(I, Ben) + degree(more, comparative) + +I run fewer miles than Ben. + _subj(run, I) + _obj(run, mile) + _quantity(mile, fewer) + _comparative(mile, run) + than(I, Ben) + degree(fewer, comparative) + +I run 10 more miles than Ben. + _subj(run, I) + _obj(run, mile) + _num_quantity(mile, 10) + _quantity_mod(10, more) + _comparative(mile, run) + than(I, Ben) + degree(more, comparative) + +I run 10 fewer miles than Ben. + _subj(run, I) + _obj(run, mile) + _num_quantity(mile, 10) + _quantity_mod(10, fewer) + _comparative(mile, run) + than(I, Ben) + degree(fewer, comparative) + +I run more often than Ben. + _subj(run, I) + _advmod(run, often) + _comparative(often, run) + than(I, Ben) + degree(often, comparative) + _advmod(often, more) + +I run less often than Ben. + _subj(run, I) + _advmod(run, often) + _comparative(often, run) + than(I, Ben) + degree(often, comparative) + _advmod(often, less) + +I run more often than Ben does. + _subj(run, I) + _subj(do, Ben) + _advmod(run, often) + _comparative(often, run) + than(I, Ben) + degree(often, comparative) + _advmod(often, more) + +I run less often than Ben does. + _subj(run, I) + _subj(do, Ben) + _advmod(run, often) + _comparative(often, run) + than(I, Ben) + degree(often, comparative) + _advmod(often, less) + +I run more often than Ben climbs. + _subj(run, I) + _subj(climb, Ben) + _comparative(often, run) + than(I, Ben) + than1(run, climb) + degree(often, comparative) + _advmod(run, often) + _advmod(often, more) + +I run less often than Ben climbs. + _subj(run, I) + _subj(climb, Ben) + _comparative(often, run) + than(I, Ben) + than1(run, climb) + degree(often, comparative) + _advmod(run, often) + _advmod(often, less) + +I run more races than Ben wins contests. + _subj(run, I) + _obj(run, race) + _subj(win, Ben) + _obj(win, contest) + _quantity(race, more) + _comparative(race, run) + than(I, Ben) + than1(run, climb) + than2(race, contest) + degree(more, comparative) + +I run fewer races than Ben wins contests. + _subj(run, I) + _obj(run, race) + _subj(win, Ben) + _obj(win, contest) + _quantity(race, fewer) + _comparative(race, run) + than(I, Ben) + than1(run, climb) + than2(race, contest) + degree(fewer, comparative) + +I have more chairs than Ben. + _obj(have, chair) + _subj(have, I) + than(I, Ben) + _comparative(chair, have) + _quantity(chair, more) + degree(more, comparative) + +I have fewer chairs than Ben. + _obj(have, chair) + _subj(have, I) + than(I, Ben) + _comparative(chair, have) + _quantity(chair, fewer) + degree(fewer, comparative) + +He earns much more money than I do. + _obj(earn, money) + _subj(do, I) + _subj(earn, he) + than(he,I) + _comparative(money,earn) + _quantity(money, more) + _advmod(more, much) + degree(more,comparative) + +He earns much less money than I do. + _obj(earn, money) + _subj(do, I) + _subj(earn, he) + than(he, I) + _comparative(money, earn) + _quantity(money, less) + _advmod(less, much) + degree(less, comparative) + +She comes here more often than her husband. + _advmod(come, here) + _advmod(often, more) + _advmod(come, often) + _subj(come, she) + _poss(husband, her) + _comparative(often, come) + than(she, husband) + degree(often, comparative) + +She comes here less often than her husband. + _advmod(come, here) + _advmod(often, less) + _advmod(come, often) + _subj(come, she) + _poss(husband, her) + _comparative(often, come) + than(she, husband) + degree(often, comparative) + +//When link-grammar uses A, relex should use _amod it will use A instead of AN; will be updated in next linkgrammer version +Russian grammar is more difficult than English grammar. + _comparative(difficult, grammar) + than(grammar, grammar) + _amod(grammar, Russian) + _predadj(grammar, difficult) + _amod(grammar, English) + degree(difficult, comparative) + +Russian grammar is less difficult than English grammar. + _comparative(difficult, grammar) + than(grammar, grammar) + _amod(grammar, Russian) + _predadj(grammar, difficult) + _amod(grammar, English) + _advmod(difficult, less) + degree(difficult, comparative) + +My sister is much more intelligent than me. + _amod(much, intelligent) + _predadj(sister, intelligent) + _poss(sister, me) + than(sister, me) + _comparative(intelligent, sister) + degree(intelligent, comparative) + +My sister is much less intelligent than me. + _amod(much, intelligent) + _predadj(sister, intelligent) + _poss(sister, me) + than(sister, me) + _comparative(intelligent, sister) + _advmod(intelligent, less) + degree(intelligent, comparative) + +I find maths lessons more enjoyable than science lessons. + _iobj(find, maths) + _obj(find, lesson) + _subj(find, I) + _amod(lesson, enjoyable) + _nn(lesson, science) + than(maths, science) + _comparative(enjoyable, maths) + degree(enjoyable, comparative) + +I find maths lessons less enjoyable than science lessons. + _iobj(find, maths) + _obj(find, lesson) + _subj(find, I) + _amod(lesson, enjoyable) + _nn(lesson, science) + than(maths, science) + _comparative(enjoyable, maths) + _advmod(enjoyable, less) + degree(enjoyable, comparative) diff --git a/src/test/resources/relex-conjunction.tsv b/src/test/resources/relex-conjunction.tsv new file mode 100644 index 000000000..db1cca574 --- /dev/null +++ b/src/test/resources/relex-conjunction.tsv @@ -0,0 +1,101 @@ +Sentence Relations +//conjoined verbs +Scientists make observations and ask questions. + _obj(make, observation) + _obj(ask, question) + _subj(make, scientist) + _subj(ask, scientist) + conj_and(make, ask) +//conjoined nouns +She is a student and an employee. + _obj(be, student) + _obj(be, employee) + _subj(be, she) + conj_and(student, employee) +//conjoined adjectives +I hailed a black and white taxi. + _obj(hail, taxi) + _subj(hail, I) + _amod(taxi, black) + _amod(taxi, white) + conj_and(black, white) +//conjoined adverbs +She ran quickly and quietly. + _advmod(run, quickly) + _advmod(run, quietly) + _subj(run, she) + conj_and(quickly, quietly) +//adjectival modifiers on conjoined subject +The big truck and the little car collided. + _amod(car, little) + _amod(truck, big) + _subj(collide, truck) + _subj(collide, car) + conj_and(truck, car) +//verbs with modifiers +We ate dinner at home and went to the movies. + _obj(eat, dinner) + conj_and(eat, go) + at(eat, home) + _subj(eat, we) + to(go, movie) + _subj(go, we) +//verb with more modifiers +We ate a late dinner at home and went out to the movies afterwards. + _obj(eat, dinner) + conj_and(eat, go_out) + at(eat, home) + _subj(eat, we) + to(go_out, movie) + _advmod(go_out, afterwards) + _subj(go_out, we) + _amod(dinner, late) + +//conjoined ditransitive verbs +She baked him a cake and sang him a song. + _iobj(sing, him) + _obj(sing, song) + _subj(sing, she) + _iobj(bake, him) + _obj(bake, cake) + conj_and(bake, sing) + _subj(bake, she) +//conjoined adverbs with modifiers +she ran very quickly and extremely quietly. + _advmod(run, quickly) + _advmod(run, quietly) + _subj(run, she) + _advmod(quietly, extremely) + conj_and(quickly, quietly) + _advmod(quickly, very) +//conjoined adverbs with out modifiers +She handled it quickly and gracefully. + _obj(handle, quickly) + _obj(handle, gracefully) + _advmod(handle, quickly) + _advmod(handle, gracefully) + _subj(handle, she) + conj_and(quickly, gracefully) +//modifiers on conjoined adjectives +He had very long and very white hair. + _obj(have, hair) + _subj(have, he) + _amod(hair, long) + _amod(hair, white) + _advmod(white, very) + conj_and(long, white) + _advmod(long, very) +//adjectival modifiers on conjoined object +The collision was between the little car and the big truck. + _pobj(between, car) + _pobj(between, truck) + _psubj(between, collision) + _amod(truck, big) + _amod(car, little) + conj_and(car, truck) +//Names Modifiers and conjunction +Big Tom and Angry Sue went to the movies. + to(go, movie) + _subj(go, Big_Tom) + _subj(go, Angry_Sue) + conj_and(Big_Tom, Angry_Sue) diff --git a/src/test/resources/relex-extraposition.tsv b/src/test/resources/relex-extraposition.tsv new file mode 100644 index 000000000..bad7b8afb --- /dev/null +++ b/src/test/resources/relex-extraposition.tsv @@ -0,0 +1,107 @@ +Sentence Relations +The woman who lives next door is a registered nurse. + _obj(be, nurse) + _subj(be, woman) + _amod(nurse, registered) + _advmod(live, next_door) + _subj(live, woman) + who(woman, live) + +A player who is injured has to leave the field. + _to-do(have, leave) + _subj(have, player) + _obj(leave, field) + _predadj(player, injured) + who(player, injured) + +Pizza, which most people love, is not very healthy. + _advmod(very, not) + _advmod(healthy, very) + _obj(love, Pizza) + _quantity(people, most) + which(Pizza, love) + _subj(love, people) + _predadj(Pizza, healthy) + +The restaurant which belongs to my aunt is very famous. + _advmod(famous, very) + to(belong, aunt) + _subj(belong, restaurant) + _poss(aunt, me) + which(restaurant, belong) + _predadj(restaurant, famous) + +The books which I read in the library were written by Charles Dickens. + _obj(write, book) + by(write, Charles_Dickens) + _obj(read, book) + in(read, library) + _subj(read, I) + which(book, read) + +This is the book whose author I met in a library. + _obj(be, book) + _subj(be, this) + _obj(meet, author) + in(meet, library) + _subj(meet, I) + whose(book, author) + +The book that Jack lent me is very boring. + _advmod(boring, very) + _iobj(lend, book) + _obj(lend, me) + _subj(lend, Jack) + that(book, lend) + _predadj(book, boring) + +They ate a special curry which was recommended by the restaurant’s owner. + _obj(eat, curry) + _subj(eat, they) + _obj(recommend, curry) + by(recommend, owner) + _poss(owner, restaurant) + which(curry, recommend) + _amod(curry, special) + +The dog who Jack said chased me was black. + _obj(chase, me) + _subj(chase, dog) + _subj(say, Jack) + _predadj(dog, black) + who(dog, chase) + +Jack, who hosted the party, is my cousin. + _obj(be, cousin) + _subj(be, Jack) + _poss(cousin, me) + _obj(host, party) + _subj(host, Jack) + who(Jack, host) + +Jack, whose name is in that book, is the student near the window. + near(be, window) + _obj(be, student) + _subj(be, Jack) + _pobj(in, book) + _psubj(in, name) + _det(book, that) + whose(Jack, name) + +Jack stopped the police car that was driving fast. + _obj(stop, car) + _subj(stop, Jack) + _advmod(drive, fast) + _subj(drive, car) + that(car, drive) + _nn(car, police) + +Just before the crossroads, the car was stopped by a traffic sign that stood on the street. + _obj(stop, car) + by(stop, sign) + _advmod(stop, just) + on(stand, street) + _subj(stand, sign) + that(sign, stand) + _nn(sign, traffic) + before(just, crossroads) diff --git a/src/test/resources/stanford-tagged.tsv b/src/test/resources/stanford-tagged.tsv new file mode 100644 index 000000000..9e5dc47fa --- /dev/null +++ b/src/test/resources/stanford-tagged.tsv @@ -0,0 +1,91 @@ +// ========================================================= +// PENN PART_OF_SPEECH TAGGING +// ========================================================= +// +Truffles picked during the spring are tasty. + nsubj(tasty-7-JJ, truffles-1-NNS) + partmod(truffles-1-NNS, picked-2-VBN) + det(spring-5-NN, the-4-DT) + prep_during(picked-2-VBN, spring-5-NN) + cop(tasty-7-JJ, are-6-VBP) + +I ate twelve truffles. + nsubj(ate-2-VBD, I-1-PRP) + num(truffles-4-NNS, twelve-3-CD) + dobj(ate-2-VBD, truffles-4-NNS) + +I have eaten twelve truffles. + nsubj(eaten-3-VBN, I-1-PRP) + aux(eaten-3-VBN, have-2-VBP) + num(truffles-5-NNS, twelve-4-CD) + dobj(eaten-3-VBN, truffles-5-NNS) + +I had eaten twelve truffles. + nsubj(eaten-3-VBN, I-1-PRP) + aux(eaten-3-VBN, had-2-VBD) + num(truffles-5-NNS, twelve-4-CD) + dobj(eaten-3-VBN, truffles-5-NNS) + +The truffles were eaten. + det(truffles-2-NNS, the-1-DT) + nsubjpass(eaten-4-VBN, truffles-2-NNS) + auxpass(eaten-4-VBN, were-3-VBD) + + +// Full disclosure:Stanford currently generates +// dep(time-4-NN, young-8-JJ) which just means it doesn't know +// the right answer (which is advcl, right?). +// It also generates advmod(young-8-JJ, when-5-WRB) in addition +// to rel(young-8-JJ, when-5-WRB) which is not quite right +// either. +There was a time when we were young. + expl(was-2-VBD, there-1-EX) + det(time-4-NN, a-3-DT) + nsubj(was-2-VBD, time-4-NN) + rel(young-8-JJ, when-5-WRB) + nsubj(young-8-JJ, we-6-PRP) + cop(young-8-JJ, were-7-VBD) + advcl(time-4-NN, young-8-JJ) + +Is there a better way? + expl(is-1-VBZ, there-2-EX) + det(way-5-NN, a-3-DT) + amod(way-5-NN, better-4-JJR) + nsubj(is-1-VBZ, way-5-NN) + +Is this the largest you can find? + cop(largest-4-JJS, is-1-VBZ) + nsubj(largest-4-JJS, this-2-DT) + det(largest-4-JJS, the-3-DT) + nsubj(find-7-VB, you-5-PRP) + aux(find-7-VB, can-6-MD) + rcmod(largest-4-JJS, find-7-VB) + +But my efforts to win his heart have failed. + poss(efforts-3-NNS, my-2-PRP$) + nsubj(failed-9-VBN, efforts-3-NNS) + aux(win-5-VB, to-4-TO) + infmod(efforts-3-NNS, win-5-VB) + poss(heart-7-NN, his-6-PRP$) + dobj(win-5-VB, heart-7-NN) + aux(failed-9-VBN, have-8-VBP) + +The undergrads are occasionally late. + det(undergrads-2-NNS, the-1-DT) + nsubj(late-5-JJ, undergrads-2-NNS) + cop(late-5-JJ, are-3-VBP) + advmod(late-5-JJ, occasionally-4-RB) + +The height of Mount Everest is 8,848 metres. + det(height-2-NN, the-1-DT) + nsubj(metres-8-NNS, height-2-NN) + nn(Everest-5-NNP, Mount-4-NNP) + prep_of(height-2-NN, Everest-5-NNP) + cop(metres-8-NNS, is-6-VBZ) + num(metres-8-NNS, 8,848-7-CD) + +It happened on December 3rd, 1990. + nsubj(happened-2-VBD, it-1-PRP) + prep_on(happened-2-VBD, December-4-NNP) + num(December-4-NNP, 3rd-5-CD) + num(December-4-NNP, 1990-7-CD) diff --git a/src/test/resources/stanford-untagged.tsv b/src/test/resources/stanford-untagged.tsv new file mode 100644 index 000000000..b3fa3afb6 --- /dev/null +++ b/src/test/resources/stanford-untagged.tsv @@ -0,0 +1,402 @@ +// The parses below were compared to the Stanford parser, circa +// 2009.Since then, it is likely that Stanford has changed. +// The tests below should not be changed, unless a) they are +// changed to be compatible with current-day Stanford, and b) they +// pass. +Who invented sliced bread? + nsubj(invented-2, who-1) + amod(bread-4, sliced-3) + dobj(invented-2, bread-4) + +Jim runs quickly. + nsubj(runs-2, Jim-1) + advmod(runs-2, quickly-3) + +The bird, a robin, sang sweetly. + det(bird-2, the-1) + nsubj(sang-7, bird-2) + det(robin-5, a-4) + appos(bird-2, robin-5) + advmod(sang-7, sweetly-8) +// wtf ?? dep is not documented .. not sure what to do here ... +There is a place we can go. + expl(is-2, there-1) + det(place-4, a-3) + nsubj(is-2, place-4) + nsubj(go-7, we-5) + aux(go-7, can-6) +//dep(is-2, go-7) + + +The linebacker gave the quarterback a push. + det(linebacker-2, the-1) + nsubj(gave-3, linebacker-2) + det(quarterback-5, the-4) + iobj(gave-3, quarterback-5) + det(push-7, a-6) + dobj(gave-3, push-7) + +He stood at the goal line. + nsubj(stood-2, he-1) + det(line-6, the-4) + nn(line-6, goal-5) + prep_at(stood-2, line-6) + +// acomp example from Stanford docs +She looks very beautiful. + nsubj(looks-2, she-1) + advmod(beautiful-4, very-3) + acomp(looks-2, beautiful-4) + +// advcl example from Stanford docs +The accident happened as the night was falling. + det(accident-2, the-1) + nsubj(happened-3, accident-2) + mark(falling-8, as-4) + det(night-6, the-5) + nsubj(falling-8, night-6) + aux(falling-8, was-7) + advcl(happened-3, falling-8) + +// advcl example from Stanford docs +If you know who did it, you should tell the teacher. + mark(know-3, if-1) + nsubj(know-3, you-2) + advcl(tell-10, know-3) + nsubj(did-5, who-4) + ccomp(know-3, did-5) + dobj(did-5, it-6) + nsubj(tell-10, you-8) + aux(tell-10, should-9) + det(teacher-12, the-11) + dobj(tell-10, teacher-12) + +// agent example from Stanford docs +The man has been killed by the police. + det(man-2, the-1) + nsubjpass(killed-5, man-2) + aux(killed-5, has-3) + auxpass(killed-5, been-4) + det(police-8, the-7) + agent(killed-5, police-8) + +Effects caused by the protein are important. + nsubj(important-7, effects-1) + partmod(effects-1, caused-2) + det(protein-5, the-4) + agent(caused-2, protein-5) + cop(important-7, are-6) + +Sam, my brother, has arrived. + nsubj(arrived-7, Sam-1) + poss(brother-4, my-3) + appos(Sam-1, brother-4) + aux(arrived-7, has-6) + +What is that? + attr(is-2, what-1) + nsubj(is-2, that-3) + +Reagan has died. + nsubj(died-3, Reagan-1) + aux(died-3, has-2) + +He should leave. + nsubj(leave-3, he-1) + aux(leave-3, should-2) + +Kennedy has been killed. + nsubjpass(killed-4, Kennedy-1) + aux(killed-4, has-2) + auxpass(killed-4, been-3) + +Kennedy was killed. + nsubjpass(killed-3, Kennedy-1) + auxpass(killed-3, was-2) + +Kennedy got killed. + nsubjpass(killed-3, Kennedy-1) + auxpass(killed-3, got-2) + +Bill is big. + nsubj(big-3, Bill-1) + cop(big-3, is-2) + +Bill is an honest man. + nsubj(man-5, Bill-1) + cop(man-5, is-2) + det(man-5, an-3) + amod(man-5, honest-4) + +What she said makes sense. + dobj(said-3, what-1) + nsubj(said-3, she-2) + csubj(makes-4, said-3) + dobj(makes-4, sense-5) + +What she said is not true. + dobj(said-3, what-1) + nsubj(said-3, she-2) + csubj(true-6, said-3) + cop(true-6, is-4) + neg(true-6, not-5) + +Which book do you prefer? + det(book-2, which-1) + dobj(prefer-5, book-2) + aux(prefer-5, do-3) + nsubj(prefer-5, you-4) + +There is a ghost in the room. + expl(is-2, there-1) + det(ghost-4, a-3) + nsubj(is-2, ghost-4) + det(room-7, the-6) + prep_in(is-2, room-7) + +She gave me a raise. + nsubj(gave-2, she-1) + iobj(gave-2, me-3) + det(raise-5, a-4) + dobj(gave-2, raise-5) + +The director is 65 years old. + det(director-2, the-1) + nsubj(old-6, director-2) + cop(old-6, is-3) + num(years-5, 65-4) + measure(old-6, years-5) + +Sam eats 3 sheep. + nsubj(eats-2, Sam-1) + num(sheep-4, 3-3) + dobj(eats-2, sheep-4) + +//**************** +// * I don't get it. Stanford makes a num/number distinction I can't grok. +// I lost $ 3.2 billion. +//nsubj(lost-2, I-1) +//dobj(lost-2, $-3) +//number($-3, 3.2-4) +//number($-3, billion-5) +//***********/ + +Truffles picked during the spring are tasty. + nsubj(tasty-7, truffles-1) + partmod(truffles-1, picked-2) + det(spring-5, the-4) + prep_during(picked-2, spring-5) + cop(tasty-7, are-6) + +//**************** +// * Currently fails due to xcomp generation problems +// * +// We went to their offices to get Bill's clothes. +//nsubj(went-2, we-1) +//xsubj(get-7, we-1) +//poss(offices-5, their-4) +//prep_to(went-2, offices-5) +//aux(get-7, to-6) +//xcomp(went-2, get-7) +//poss(clothes-10, Bill-8) +//dobj(get-7, clothes-10) +//***********/ + +//**************** +// * See README-Stanford for details. +// All the boys are here. +//predet(boys-3, all-1) +//det(boys-3, the-2) +//nsubj(are-4, boys-3) +//advmod(are-4, here-5) +//***********/ + +//**************** +// * These are ambiguous parses. +// * Stanford picks the opposite choice from Relex. +// * See the README-Stanford for a discussion. +// I saw a cat in a hat. +//nsubj(saw-2, I-1) +//det(cat-4, a-3) +//dobj(saw-2, cat-4) +//det(hat-7, a-6) +//prep_in(cat-4, hat-7) +// +// I saw a cat with a telescope. +//nsubj(saw-2, I-1) +//det(cat-4, a-3) +//dobj(saw-2, cat-4) +//det(telescope-7, a-6) +//prep_with(cat-4, telescope-7) +//***********/ + +He is responsible for meals. + nsubj(responsible-3, he-1) + cop(responsible-3, is-2) + prep_for(responsible-3, meals-5) + +They shut down the station. + nsubj(shut-2, they-1) + prt(shut-2, down-3) + det(station-5, the-4) + dobj(shut-2, station-5) + +About 200 people came to the party. + quantmod(200-2, about-1) + num(people-3, 200-2) + nsubj(came-4, people-3) + det(party-7, the-6) + prep_to(came-4, party-7) + +I saw the man who you love. + nsubj(saw-2, I-1) + det(man-4, the-3) + dobj(saw-2, man-4) + dobj(love-7, man-4) + rel(love-7, who-5) + nsubj(love-7, you-6) + rcmod(man-4, love-7) + + +//**************** +// * +// * relex is failing to generate the following: +// * Almost got it w/the B** rules but not quite ... +// +//rel(love-8, wife-6) +//rcmod(man-4, love-8) +// +// I saw the man whose wife you love. +//nsubj(saw-2, I-1) +//det(man-4, the-3) +//dobj(saw-2, man-4) +//poss(wife-6, whose-5) +//dobj(love-8, wife-6) +//rel(love-8, wife-6) +//nsubj(love-8, you-7) +//rcmod(man-4, love-8) +//***********/ + +I am ready to leave. + nsubj(ready-3, I-1) + cop(ready-3, am-2) + aux(leave-5, to-4) + xcomp(ready-3, leave-5) + +Tom likes to eat fish. + nsubj(likes-2, Tom-1) + xsubj(eat-4, Tom-1) + aux(eat-4, to-3) + xcomp(likes-2, eat-4) + dobj(eat-4, fish-5) + +//**************** +// He says that you like to swim. +//nsubj(says-2, he-1) +//complm(like-5, that-3) +//nsubj(like-5, you-4) +//ccomp(says-2, like-5) +//nsubj(swim-7, to-6) // NFW that this can be right. +//ccomp(like-5, swim-7) +//***********/ + +//**************** +// The garage is next to the house. +//det(garage-2, the-1) +//nsubj(next-4, garage-2) +//cop(next-4, is-3) +//det(house-7, the-6) +//prep_to(next-4, house-7) +//***********/ + +// ========================================================= +// PENN PART_OF_SPEECH TAGGING +// ========================================================= +// +Truffles picked during the spring are tasty. + nsubj(tasty-7-JJ, truffles-1-NNS) + partmod(truffles-1-NNS, picked-2-VBN) + det(spring-5-NN, the-4-DT) + prep_during(picked-2-VBN, spring-5-NN) + cop(tasty-7-JJ, are-6-VBP) + +I ate twelve truffles. + nsubj(ate-2-VBD, I-1-PRP) + num(truffles-4-NNS, twelve-3-CD) + dobj(ate-2-VBD, truffles-4-NNS) + +I have eaten twelve truffles. + nsubj(eaten-3-VBN, I-1-PRP) + aux(eaten-3-VBN, have-2-VBP) + num(truffles-5-NNS, twelve-4-CD) + dobj(eaten-3-VBN, truffles-5-NNS) + +I had eaten twelve truffles. + nsubj(eaten-3-VBN, I-1-PRP) + aux(eaten-3-VBN, had-2-VBD) + num(truffles-5-NNS, twelve-4-CD) + dobj(eaten-3-VBN, truffles-5-NNS) + +The truffles were eaten. + det(truffles-2-NNS, the-1-DT) + nsubjpass(eaten-4-VBN, truffles-2-NNS) + auxpass(eaten-4-VBN, were-3-VBD) + + +// Full disclosure:Stanford currently generates +// dep(time-4-NN, young-8-JJ) which just means it doesn't know +// the right answer (which is advcl, right?). +// It also generates advmod(young-8-JJ, when-5-WRB) in addition +// to rel(young-8-JJ, when-5-WRB) which is not quite right +// either. +There was a time when we were young. + expl(was-2-VBD, there-1-EX) + det(time-4-NN, a-3-DT) + nsubj(was-2-VBD, time-4-NN) + rel(young-8-JJ, when-5-WRB) + nsubj(young-8-JJ, we-6-PRP) + cop(young-8-JJ, were-7-VBD) + advcl(time-4-NN, young-8-JJ) + +Is there a better way? + expl(is-1-VBZ, there-2-EX) + det(way-5-NN, a-3-DT) + amod(way-5-NN, better-4-JJR) + nsubj(is-1-VBZ, way-5-NN) + +Is this the largest you can find? + cop(largest-4-JJS, is-1-VBZ) + nsubj(largest-4-JJS, this-2-DT) + det(largest-4-JJS, the-3-DT) + nsubj(find-7-VB, you-5-PRP) + aux(find-7-VB, can-6-MD) + rcmod(largest-4-JJS, find-7-VB) + +But my efforts to win his heart have failed. + poss(efforts-3-NNS, my-2-PRP$) + nsubj(failed-9-VBN, efforts-3-NNS) + aux(win-5-VB, to-4-TO) + infmod(efforts-3-NNS, win-5-VB) + poss(heart-7-NN, his-6-PRP$) + dobj(win-5-VB, heart-7-NN) + aux(failed-9-VBN, have-8-VBP) + +The undergrads are occasionally late. + det(undergrads-2-NNS, the-1-DT) + nsubj(late-5-JJ, undergrads-2-NNS) + cop(late-5-JJ, are-3-VBP) + advmod(late-5-JJ, occasionally-4-RB) + +The height of Mount Everest is 8,848 metres. + det(height-2-NN, the-1-DT) + nsubj(metres-8-NNS, height-2-NN) + nn(Everest-5-NNP, Mount-4-NNP) + prep_of(height-2-NN, Everest-5-NNP) + cop(metres-8-NNS, is-6-VBZ) + num(metres-8-NNS, 8,848-7-CD) + +It happened on December 3rd, 1990. + nsubj(happened-2-VBD, it-1-PRP) + prep_on(happened-2-VBD, December-4-NNP) + num(December-4-NNP, 3rd-5-CD) + num(December-4-NNP, 1990-7-CD)