From 5209d0084e467a1edd4aebe42cdf08e0ea1ee2cf Mon Sep 17 00:00:00 2001 From: Hendy Irawan Date: Mon, 14 Jul 2014 16:35:17 -0430 Subject: [PATCH] Move test fixtures into TSV plain text files. Test data for RelEx and Stanford are moved into TSV files. The Test classes (`TestRelEx` and `TestStanford`), in addition to having most of its content factored out, also had unnormalized line endings, so to Git these classes look replaced entirely. The entirety of test data now reside in these `.tsv` files. I think the format should be self-explanatory just by looking at it. Ant build and tests still work. @linas I hope this is acceptable. Fixed #103. --- build.xml | 34 +- java7.settings.xml | 4 + pom.xml | 41 +- src/java_test/relex/test/RelExCases.java | 125 +++ src/java_test/relex/test/StanfordCases.java | 120 +++ src/java_test/relex/test/TestRelEx.java | 986 +++----------------- src/java_test/relex/test/TestStanford.java | 698 ++------------ src/test/resources/logback.xml | 16 + src/test/resources/relex-comparatives.tsv | 455 +++++++++ src/test/resources/relex-conjunction.tsv | 101 ++ src/test/resources/relex-extraposition.tsv | 107 +++ src/test/resources/stanford-tagged.tsv | 91 ++ src/test/resources/stanford-untagged.tsv | 402 ++++++++ 13 files changed, 1690 insertions(+), 1490 deletions(-) create mode 100644 java7.settings.xml create mode 100644 src/java_test/relex/test/RelExCases.java create mode 100644 src/java_test/relex/test/StanfordCases.java create mode 100644 src/test/resources/logback.xml create mode 100644 src/test/resources/relex-comparatives.tsv create mode 100644 src/test/resources/relex-conjunction.tsv create mode 100644 src/test/resources/relex-extraposition.tsv create mode 100644 src/test/resources/stanford-tagged.tsv create mode 100644 src/test/resources/stanford-untagged.tsv diff --git a/build.xml b/build.xml index cb982bce6..533d00e98 100644 --- a/build.xml +++ b/build.xml @@ -13,6 +13,7 @@ + @@ -77,7 +78,7 @@ - + @@ -180,22 +181,27 @@ - - - - - + + + + - - - - - - + + + + + + + + + - - + + + + + diff --git a/java7.settings.xml b/java7.settings.xml new file mode 100644 index 000000000..55057accb --- /dev/null +++ b/java7.settings.xml @@ -0,0 +1,4 @@ + + + diff --git a/pom.xml b/pom.xml index 76effdc04..48cd86c4b 100644 --- a/pom.xml +++ b/pom.xml @@ -163,6 +163,17 @@ + + net.sf.jwordnet + jwnl + 1.4_rc3 + + + commons-logging + commons-logging + + + net.sf.opencsv @@ -239,6 +250,12 @@ true + + com.google.guava + guava + 17.0 + + org.apache.odftoolkit odfdom-java @@ -317,17 +334,6 @@ 3.2 - - net.sf.jwordnet - jwnl - 1.4_rc3 - - - commons-logging - commons-logging - - - org.linkgrammar linkgrammar @@ -349,12 +355,6 @@ - - - com.google.guava - guava-jdk5 - 17.0 - @@ -371,6 +371,13 @@ + + + com.google.guava + guava-jdk5 + 17.0 + + diff --git a/src/java_test/relex/test/RelExCases.java b/src/java_test/relex/test/RelExCases.java new file mode 100644 index 000000000..a2ad6eff8 --- /dev/null +++ b/src/java_test/relex/test/RelExCases.java @@ -0,0 +1,125 @@ +package relex.test; + +import java.io.InputStreamReader; +import java.net.URL; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import au.com.bytecode.opencsv.CSVReader; +import au.com.bytecode.opencsv.CSVWriter; + +import com.google.common.base.Function; +import com.google.common.base.Optional; +import com.google.common.base.Preconditions; +import com.google.common.collect.FluentIterable; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +/** + * @author ceefour + * + */ +public class RelExCases { + + private static final Logger log = LoggerFactory + .getLogger(RelExCases.class); + + static class RelExCase { + String sentence; + Set relations; + Optional description; + + RelExCase(String sentence, Set relations, Optional description) { + super(); + this.description = description; + this.sentence = sentence; + this.relations = relations; + } + } + + static class RelExCaseToObjectArray implements Function { + public Object[] apply(RelExCase input) { + return new Object[] { input.sentence, input.relations, input.description }; + } + } + + protected static ImmutableList parseTsv(String casesPath) { + URL casesTsv = Preconditions.checkNotNull(RelExCases.class.getResource(casesPath), + "Cannot load '%s' from classpath", casesPath); + log.info("Loading '{}'...", casesTsv); + try { + CSVReader reader = new CSVReader(new InputStreamReader(casesTsv.openStream()), + '\t', CSVWriter.DEFAULT_QUOTE_CHARACTER, CSVWriter.NO_ESCAPE_CHARACTER); + try { + Optional curDescription = Optional.absent(); + Optional curSentence = Optional.absent(); + ImmutableSet.Builder curRelations = ImmutableSet.builder(); + ImmutableList.Builder cases = ImmutableList.builder(); + reader.readNext(); // skip header line + while (true) { + String[] row = reader.readNext(); + if (row == null) { + break; + } + if (row.length == 0) { + continue; + } + if (row[0].startsWith("//")) { + // add previous sentence + if (curSentence.isPresent()) { + cases.add(new RelExCase(curSentence.get(), curRelations.build(), curDescription)); + curSentence = Optional.absent(); + curRelations = ImmutableSet.builder(); + } + curDescription = Optional.of(row[0].substring(2).trim()); + continue; + } + if (!row[0].trim().isEmpty()) { + // add previous sentence + if (curSentence.isPresent()) { + cases.add(new RelExCase(curSentence.get(), curRelations.build(), curDescription)); + curSentence = Optional.absent(); + curRelations = ImmutableSet.builder(); + } + // sentence row + curSentence = Optional.of(row[0].trim()); + } else if (row.length >= 2 && !row[1].trim().isEmpty()) { + // relation row + curRelations.add(row[1].trim()); + } + } + // add previous sentence + if (curSentence.isPresent()) { + cases.add(new RelExCase(curSentence.get(), curRelations.build(), curDescription)); + curSentence = Optional.absent(); + curRelations = ImmutableSet.builder(); + } + final ImmutableList caseList = cases.build(); + log.info("Got {} cases from '{}'", caseList.size(), casesPath); + return caseList; + } finally { + reader.close(); + } + } catch (Exception e) { + throw new RuntimeException("Cannot read " + casesTsv, e); + } + } + + public static Object[] provideComparatives() { + return FluentIterable.from(parseTsv("/relex-comparatives.tsv")) + .transform(new RelExCaseToObjectArray()).toArray(Object[].class); + } + + public static Object[] provideConjunction() { + return FluentIterable.from(parseTsv("/relex-conjunction.tsv")) + .transform(new RelExCaseToObjectArray()).toArray(Object[].class); + } + + public static Object[] provideExtraposition() { + return FluentIterable.from(parseTsv("/relex-extraposition.tsv")) + .transform(new RelExCaseToObjectArray()).toArray(Object[].class); + } + +} \ No newline at end of file diff --git a/src/java_test/relex/test/StanfordCases.java b/src/java_test/relex/test/StanfordCases.java new file mode 100644 index 000000000..40b6d2dbd --- /dev/null +++ b/src/java_test/relex/test/StanfordCases.java @@ -0,0 +1,120 @@ +package relex.test; + +import java.io.InputStreamReader; +import java.net.URL; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import au.com.bytecode.opencsv.CSVReader; +import au.com.bytecode.opencsv.CSVWriter; + +import com.google.common.base.Function; +import com.google.common.base.Optional; +import com.google.common.base.Preconditions; +import com.google.common.collect.FluentIterable; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +/** + * @author ceefour + * + */ +public class StanfordCases { + + private static final Logger log = LoggerFactory + .getLogger(StanfordCases.class); + + static class StanfordCase { + String sentence; + Set relations; + Optional description; + + StanfordCase(String sentence, Set relations, Optional description) { + super(); + this.description = description; + this.sentence = sentence; + this.relations = relations; + } + } + + static class StanfordCaseToObjectArray implements Function { + public Object[] apply(StanfordCase input) { + return new Object[] { input.sentence, input.relations, input.description }; + } + } + + protected static ImmutableList parseTsv(String casesPath) { + URL casesTsv = Preconditions.checkNotNull(StanfordCases.class.getResource(casesPath), + "Cannot load '%s' from classpath", casesPath); + log.info("Loading '{}'...", casesTsv); + try { + CSVReader reader = new CSVReader(new InputStreamReader(casesTsv.openStream()), + '\t', CSVWriter.DEFAULT_QUOTE_CHARACTER, CSVWriter.NO_ESCAPE_CHARACTER); + try { + Optional curDescription = Optional.absent(); + Optional curSentence = Optional.absent(); + ImmutableSet.Builder curRelations = ImmutableSet.builder(); + ImmutableList.Builder cases = ImmutableList.builder(); + reader.readNext(); // skip header line + while (true) { + String[] row = reader.readNext(); + if (row == null) { + break; + } + if (row.length == 0) { + continue; + } + if (row[0].startsWith("//")) { + // add previous sentence + if (curSentence.isPresent()) { + cases.add(new StanfordCase(curSentence.get(), curRelations.build(), curDescription)); + curSentence = Optional.absent(); + curRelations = ImmutableSet.builder(); + } + curDescription = Optional.of(row[0].substring(2).trim()); + continue; + } + if (!row[0].trim().isEmpty()) { + // add previous sentence + if (curSentence.isPresent()) { + cases.add(new StanfordCase(curSentence.get(), curRelations.build(), curDescription)); + curSentence = Optional.absent(); + curRelations = ImmutableSet.builder(); + } + // sentence row + curSentence = Optional.of(row[0].trim()); + } else if (row.length >= 2 && !row[1].trim().isEmpty()) { + // relation row + curRelations.add(row[1].trim()); + } + } + // add previous sentence + if (curSentence.isPresent()) { + cases.add(new StanfordCase(curSentence.get(), curRelations.build(), curDescription)); + curSentence = Optional.absent(); + curRelations = ImmutableSet.builder(); + } + final ImmutableList caseList = cases.build(); + log.info("Got {} cases from '{}'", caseList.size(), casesPath); + return caseList; + } finally { + reader.close(); + } + } catch (Exception e) { + throw new RuntimeException("Cannot read " + casesTsv, e); + } + } + + public static Object[] provideUntagged() { + return FluentIterable.from(parseTsv("/stanford-untagged.tsv")) + .transform(new StanfordCaseToObjectArray()).toArray(Object[].class); + } + + public static Object[] provideTagged() { + return FluentIterable.from(parseTsv("/stanford-tagged.tsv")) + .transform(new StanfordCaseToObjectArray()).toArray(Object[].class); + } + +} \ No newline at end of file diff --git a/src/java_test/relex/test/TestRelEx.java b/src/java_test/relex/test/TestRelEx.java index cce439784..cb5eb60f6 100644 --- a/src/java_test/relex/test/TestRelEx.java +++ b/src/java_test/relex/test/TestRelEx.java @@ -1,863 +1,123 @@ -/* - * Copyright 2009 Linas Vepstas - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package relex.test; - -import java.util.ArrayList; -import java.util.Collections; - -import org.junit.BeforeClass; -import org.junit.Test; - -import relex.ParsedSentence; -import relex.RelationExtractor; -import relex.Sentence; -import relex.output.SimpleView; - -public class TestRelEx -{ - private static RelationExtractor re; - private int pass; - private int fail; - private int subpass; - private int subfail; - private static ArrayList sentfail= new ArrayList(); - - @BeforeClass - public static void setUpClass() { - re = new RelationExtractor(); - } - - public TestRelEx() - { - pass = 0; - fail = 0; - subpass = 0; - subfail = 0; - } - - public ArrayList split(String a) - { - String[] sa = a.split("\n"); - ArrayList saa = new ArrayList(); - for (String s : sa) { - saa.add(s); - } - Collections.sort (saa); - return saa; - } - - /** - * First argument is the sentence. - * Second argument is a list of the relations that RelEx - * should be generating. - * Return true if RelEx generates the same dependencies - * as the second argument. - */ - public boolean test_sentence (String sent, String sf) - { - re.do_penn_tagging = false; - re.setMaxParses(1); - Sentence sntc = re.processSentence(sent); - ParsedSentence parse = sntc.getParses().get(0); - String rs = SimpleView.printBinaryRelations(parse); - String urs = SimpleView.printUnaryRelations(parse); - - ArrayList exp = split(sf); - ArrayList brgot = split(rs); - ArrayList urgot = split(urs); - - //add number of binary relations from parser-output, to total number of relationships got - int sizeOfGotRelations= brgot.size(); - //check expected binary and unary relations - //the below for-loop checks whether all expected binary relations are - //contained in the parser-binary-relation-output arrayList "brgot". - //if any unary relations are expected in the output it checks the - //parser-unary-relation-output arrayList "urgot" for unary relationships - for (int i=0; i< exp.size(); i++) - { - if(!brgot.contains(exp.get(i))) - { - if(!urgot.contains(exp.get(i))) - { - System.err.println("Error: content miscompare:\n" + - "\tExpected = " + exp + "\n" + - "\tGot Binary Relations = " + brgot + "\n" + - "\tGot Unary Relations = " + urgot + "\n" + - "\tSentence = " + sent); - subfail ++; - fail ++; - sentfail.add(sent); - return false; - } - //add the unary relation, count to totoal number of binary relations - sizeOfGotRelations++; - } - - } - //The size checking of the expected relationships vs output relationships - //is done here purposefully, to accommodate if there is any unary relationships present - //in the expected output(see above for-loop also). - //However it only checks whether parser-output resulted more relationships(binary+unary) than expected relations - //If the parser-output resulted less relationships(binary+unary) than expected it would - //catch that in the above for-loop - if (exp.size() < sizeOfGotRelations) - { - System.err.println("Error: size miscompare:\n" + - "\tExpected = " + exp + "\n" + - "\tGot Binary Relations = " + brgot + "\n" + - "\tGot Unary Relations = " + urgot + "\n" + - "\tSentence = " + sent); - subfail ++; - fail ++; - sentfail.add(sent); - return false; - } - - subpass ++; - pass ++; - return true; - } - - public void report(boolean rc, String subsys) - { - if (rc) { - System.err.println(subsys + ": Tested " + pass + " sentences, test passed OK"); - } else { - System.err.println(subsys + ": Test failed\n\t" + - fail + " sentences failed\n\t" + - pass + " sentences passed"); - } - subpass = 0; - subfail = 0; - } - - public boolean test_comparatives() - { - boolean rc = true; - rc &= test_sentence ("Some people like pigs less than dogs.", - "_advmod(like, less)\n" + - "_obj(like, pig)\n" + - "_quantity(people, some)\n" + - "_subj(like, people)\n" + - "than(pig, dog)\n"); - - rc &= test_sentence ("Some people like pigs more than dogs.", - "_advmod(like, more)\n" + - "_obj(like, pig)\n" + - "_quantity(people, some)\n" + - "_subj(like, people)\n" + - "than(pig, dog)\n"); - //Non-equal Gradable : Two entities one feature "more/less" - - rc &= test_sentence ("He is more intelligent than John.", - "than(he, John)\n" + - "_comparative(intelligent, he)\n" + - "degree(intelligent, comparative)\n"+ - "_predadj(he, intelligent)\n"); - - rc &= test_sentence ("He is less intelligent than John.", - "than(he, John)\n" + - "_comparative(intelligent, he)\n" + - "degree(intelligent, comparative)\n"+ - "_advmod(intelligent, less)\n"+ - "_predadj(he, intelligent)\n"); - - rc &= test_sentence ("He runs more quickly than John.", - "_advmod(run, quickly)\n"+ - "_advmod(quickly, more)\n"+ - "_subj(run, he)\n" + - "than(he, John)\n" + - "_comparative(quickly, run)\n" + - "degree(quickly, comparative)\n"); - - rc &= test_sentence ("He runs less quickly than John.", - "_advmod(run, quickly)\n" + - "_subj(run, he)\n" + - "_advmod(quickly, less)\n"+ - "than(he, John)\n" + - "_comparative(quickly, run)\n" + - "degree(quickly, comparative)\n"); - - rc &= test_sentence ("He runs more quickly than John does.", - "_advmod(run, quickly)\n" + - "_advmod(quickly, more)\n"+ - "_subj(run, he)\n" + - "_subj(do, John)\n"+ - "than(he, John)\n" + - "_comparative(quickly, run)\n" + - "degree(quickly, comparative)\n"); - - //This sentence is ungrammatical but commonly used by non-native English speakers - rc &= test_sentence ("He runs less quickly than John does.", - "_advmod(run, quickly)\n" + - "_subj(run, he)\n" + - "_subj(do, John)\n"+ - "_advmod(quickly, less)\n"+ - "than(he, John)\n" + - "_comparative(quickly, run)\n" + - "degree(quickly, comparative)\n"); - - rc &= test_sentence ("He runs slower than John does.", - "_advmod(run, slow)\n" + - "_subj(run, he)\n" + - "_subj(do, John)\n"+ - "than(he, John)\n" + - "_comparative(slow, run)\n" + - "degree(slow, comparative)\n"); - - rc &= test_sentence ("He runs more than John.", - "_obj(run, more)\n" + - "_subj(run, he)\n" + - "than(he, John)\n"+ - "_comparative(more, run)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("He runs less than John.", - "_obj(run, less)\n" + - "_subj(run, he)\n" + - "than(he, John)\n"+ - "_comparative(less, run)\n"+ - "degree(less, comparative)\n"); - - rc &= test_sentence ("He runs faster than John.", - "than(he, John)\n" + - "_comparative(fast, run)\n" + - "_subj(run, he)\n"+ - "_advmod(run, fast)\n"+ - "degree(fast, comparative)\n"); - - rc &= test_sentence ("He runs more slowly than John.", - "than(he, John)\n" + - "_subj(run, he)\n" + - "_advmod(slowly, more)\n"+ - "_comparative(slowly, run)\n"+ - "_advmod(run, slowly)\n"+ - "degree(slowly, comparative)\n"); - - rc &= test_sentence ("He runs less slowly than John.", - "than(he, John)\n" + - "_subj(run, he)\n" + - "_comparative(slowly, run)\n"+ - "_advmod(run, slowly)\n"+ - "_advmod(slowly, less)\n"+ - "degree(slowly, comparative)\n"); - - rc &= test_sentence ("He runs more miles than John does.", - "than(he, John)\n" + - "_subj(run, he)\n" + - "_subj(do, John)\n"+ - "_obj(run, mile)\n"+ - "_comparative(mile, run)\n"+ - "_quantity(mile, more)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("He runs less miles than John does.", - "than(he, John)\n" + - "_subj(run, he)\n" + - "_subj(do, John)\n"+ - "_obj(run, mile)\n"+ - "_comparative(mile, run)\n"+ - "_quantity(mile, less)\n"+ - "degree(less, comparative)\n"); - - rc &= test_sentence ("He runs many more miles than John does.", - "than(he, John)\n" + - "_comparative(mile, run)\n"+ - "_obj(run, mile)\n"+ - "_subj(run, he)\n" + - "_subj(do, John)\n" + - "_quantity(mile, many)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("He runs fewer miles than John does.", - "than(he, John)\n" + - "_comparative(mile, run)\n"+ - "_obj(run, mile)\n"+ - "_subj(run, he)\n" + - "_subj(do, John)\n" + - "_quantity(mile, fewer)\n"+ - "degree(fewer, comparative)\n"); - - rc &= test_sentence ("He runs ten more miles than John.", - "_obj(run, mile)\n"+ - "_subj(run, he)\n" + - "_quantity(mile, more)\n"+ - "than(he, John)\n" + - "_comparative(mile, run)\n"+ - "_num_quantity(miles, ten)\n" + - "degree(more, comparative)\n"); - - rc &= test_sentence ("He runs almost ten more miles than John does.", - "_obj(run, mile)\n"+ - "_subj(run, he)\n"+ - "_comparative(mile, run)\n"+ - "_subj(do, John)\n"+ - "than(he, John)\n"+ - "_quantity_mod(ten, almost)\n"+ - "_num_quantity(miles, ten)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("He runs more often than John.", - "_subj(run, he)\n"+ - "_advmod(often, more)\n"+ - "_advmod(run, often)\n"+ - "_comparative(often, run)\n"+ - "than(he, John)\n"+ - "degree(often, comparative)\n"); - - rc &= test_sentence ("He runs less often than John.", - "_subj(run, he)\n"+ - "_advmod(often, less)\n"+ - "_advmod(run, often)\n"+ - "_comparative(often, run)\n"+ - "than(he, John)\n"+ - "degree(often, comparative)\n"); - - rc &= test_sentence ("He runs here more often than John.", - "_advmod(run, here)\n"+ - "_advmod(often, more)\n"+ - "_advmod(run, often)\n"+ - "_subj(run, he)\n"+ - "_comparative(often, run)\n"+ - "than(he, John)\n"+ - "degree(often, comparative)\n"); - - rc &= test_sentence ("He runs here less often than John.", - "_advmod(run, here)\n"+ - "_advmod(often, less)\n"+ - "_advmod(run, often)\n"+ - "_subj(run, he)\n"+ - "_comparative(often, run)\n"+ - "than(he, John)\n"+ - "degree(often, comparative)\n"); - - rc &= test_sentence ("He is faster than John.", - "than(he, John)\n"+ - "_predadj(he, fast)\n"+ - "_comparative(fast, be)\n"+ - "degree(fast, comparative)\n"); - - rc &= test_sentence ("He is faster than John is.", - "than(he, John)\n"+ - "_predadj(he, fast)\n"+ - "_subj(be, John)\n"+ - "_comparative(fast, be)\n"+ - "degree(fast, comparative)\n"); - - rc &= test_sentence ("His speed is faster than John's.", - "than(speed, be)\n"+ - "_predadj(speed, fast)\n"+ - "_poss(speed, him)\n"+ - "_comparative(fast, be)\n"+ - "degree(fast, comparative)\n"); - - rc &= test_sentence ("I run more than Ben.", - "_subj(run, I)\n"+ - "_obj(run, more)\n"+ - "_comparative(more, run)\n"+ - "than(I, Ben)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("I run less than Ben.", - "_subj(run, I)\n"+ - "_obj(run, less)\n"+ - "_comparative(less, run)\n"+ - "than(I, Ben)\n"+ - "degree(less, comparative)\n"); - - rc &= test_sentence ("I run more miles than Ben.", - "_subj(run, I)\n"+ - "_obj(run, mile)\n"+ - "_quantity(mile, more)\n"+ - "_comparative(mile, run)\n"+ - "than(I, Ben)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("I run fewer miles than Ben.", - "_subj(run, I)\n"+ - "_obj(run, mile)\n"+ - "_quantity(mile, fewer)\n"+ - "_comparative(mile, run)\n"+ - "than(I, Ben)\n"+ - "degree(fewer, comparative)\n"); - - rc &= test_sentence ("I run 10 more miles than Ben.", - "_subj(run, I)\n"+ - "_obj(run, mile)\n"+ - "_num_quantity(mile, 10)\n"+ - "_quantity_mod(10, more)\n"+ - "_comparative(mile, run)\n"+ - "than(I, Ben)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("I run 10 fewer miles than Ben.", - "_subj(run, I)\n"+ - "_obj(run, mile)\n"+ - "_num_quantity(mile, 10)\n"+ - "_quantity_mod(10, fewer)\n"+ - "_comparative(mile, run)\n"+ - "than(I, Ben)\n"+ - "degree(fewer, comparative)\n"); - - rc &= test_sentence ("I run more often than Ben.", - "_subj(run, I)\n"+ - "_advmod(run, often)\n"+ - "_comparative(often, run)\n"+ - "than(I, Ben)\n"+ - "degree(often, comparative)\n"+ - "_advmod(often, more)\n"); - - rc &= test_sentence ("I run less often than Ben.", - "_subj(run, I)\n"+ - "_advmod(run, often)\n"+ - "_comparative(often, run)\n"+ - "than(I, Ben)\n"+ - "degree(often, comparative)\n"+ - "_advmod(often, less)\n"); - - rc &= test_sentence ("I run more often than Ben does.", - "_subj(run, I)\n"+ - "_subj(do, Ben)\n"+ - "_advmod(run, often)\n"+ - "_comparative(often, run)\n"+ - "than(I, Ben)\n"+ - "degree(often, comparative)\n"+ - "_advmod(often, more)\n"); - - rc &= test_sentence ("I run less often than Ben does.", - "_subj(run, I)\n"+ - "_subj(do, Ben)\n"+ - "_advmod(run, often)\n"+ - "_comparative(often, run)\n"+ - "than(I, Ben)\n"+ - "degree(often, comparative)\n"+ - "_advmod(often, less)\n"); - - rc &= test_sentence ("I run more often than Ben climbs.", - "_subj(run, I)\n"+ - "_subj(climb, Ben)\n"+ - "_comparative(often, run)\n"+ - "than(I, Ben)\n"+ - "than1(run, climb)\n"+ - "degree(often, comparative)\n"+ - "_advmod(run, often)\n"+ - "_advmod(often, more)\n"); - - rc &= test_sentence ("I run less often than Ben climbs.", - "_subj(run, I)\n"+ - "_subj(climb, Ben)\n"+ - "_comparative(often, run)\n"+ - "than(I, Ben)\n"+ - "than1(run, climb)\n"+ - "degree(often, comparative)\n"+ - "_advmod(run, often)\n"+ - "_advmod(often, less)\n"); - - rc &= test_sentence ("I run more races than Ben wins contests.", - "_subj(run, I)\n"+ - "_obj(run, race)\n"+ - "_subj(win, Ben)\n"+ - "_obj(win, contest)\n"+ - "_quantity(race, more)\n"+ - "_comparative(race, run)\n"+ - "than(I, Ben)\n"+ - "than1(run, climb)\n"+ - "than2(race, contest)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("I run fewer races than Ben wins contests.", - "_subj(run, I)\n"+ - "_obj(run, race)\n"+ - "_subj(win, Ben)\n"+ - "_obj(win, contest)\n"+ - "_quantity(race, fewer)\n"+ - "_comparative(race, run)\n"+ - "than(I, Ben)\n"+ - "than1(run, climb)\n"+ - "than2(race, contest)\n"+ - "degree(fewer, comparative)\n"); - - rc &= test_sentence ("I have more chairs than Ben.", - "_obj(have, chair)\n"+ - "_subj(have, I)\n"+ - "than(I, Ben)\n"+ - "_comparative(chair, have)\n"+ - "_quantity(chair, more)\n"+ - "degree(more, comparative)\n"); - - rc &= test_sentence ("I have fewer chairs than Ben.", - "_obj(have, chair)\n"+ - "_subj(have, I)\n"+ - "than(I, Ben)\n"+ - "_comparative(chair, have)\n"+ - "_quantity(chair, fewer)\n"+ - "degree(fewer, comparative)\n"); - - rc &= test_sentence ("He earns much more money than I do.", - "_obj(earn, money)\n"+ - "_subj(do, I)\n"+ - "_subj(earn, he)\n"+ - "than(he,I)\n"+ - "_comparative(money,earn)\n"+ - "_quantity(money, more)\n"+ - "_advmod(more, much)\n"+ - "degree(more,comparative)\n"); - - rc &= test_sentence ("He earns much less money than I do.", - "_obj(earn, money)\n"+ - "_subj(do, I)\n"+ - "_subj(earn, he)\n"+ - "than(he, I)\n"+ - "_comparative(money, earn)\n"+ - "_quantity(money, less)\n"+ - "_advmod(less, much)\n"+ - "degree(less, comparative)\n"); - - rc &= test_sentence ("She comes here more often than her husband.", - "_advmod(come, here)\n"+ - "_advmod(often, more)\n"+ - "_advmod(come, often)\n"+ - "_subj(come, she)\n"+ - "_poss(husband, her)\n"+ - "_comparative(often, come)\n"+ - "than(she, husband)\n"+ - "degree(often, comparative)\n"); - - rc &= test_sentence ("She comes here less often than her husband.", - "_advmod(come, here)\n"+ - "_advmod(often, less)\n"+ - "_advmod(come, often)\n"+ - "_subj(come, she)\n"+ - "_poss(husband, her)\n"+ - "_comparative(often, come)\n"+ - "than(she, husband)\n"+ - "degree(often, comparative)\n"); - - rc &= test_sentence ("Russian grammar is more difficult than English grammar.", - "_comparative(difficult, grammar)\n"+ - "than(grammar, grammar)\n"+ - "_amod(grammar, Russian)\n"+ //When link-grammar uses A, relex should use _amod it will use A instead of AN; will be updated in next linkgrammer version - "_predadj(grammar, difficult)\n"+ - "_amod(grammar, English)\n"+ - "degree(difficult, comparative)\n"); - - rc &= test_sentence ("Russian grammar is less difficult than English grammar.", - "_comparative(difficult, grammar)\n"+ - "than(grammar, grammar)\n"+ - "_amod(grammar, Russian)\n"+ - "_predadj(grammar, difficult)\n"+ - "_amod(grammar, English)\n"+ - "_advmod(difficult, less)\n"+ - "degree(difficult, comparative)\n"); - - rc &= test_sentence ("My sister is much more intelligent than me.", - "_amod(much, intelligent)\n"+ - "_predadj(sister, intelligent)\n"+ - "_poss(sister, me)\n"+ - "than(sister, me)\n"+ - "_comparative(intelligent, sister)\n"+ - "degree(intelligent, comparative)\n"); - - rc &= test_sentence ("My sister is much less intelligent than me.", - "_amod(much, intelligent)\n"+ - "_predadj(sister, intelligent)\n"+ - "_poss(sister, me)\n"+ - "than(sister, me)\n"+ - "_comparative(intelligent, sister)\n"+ - "_advmod(intelligent, less)\n"+ - "degree(intelligent, comparative)\n"); - - rc &= test_sentence ("I find maths lessons more enjoyable than science lessons.", - "_iobj(find, maths)\n"+ - "_obj(find, lesson)\n"+ - "_subj(find, I)\n"+ - "_amod(lesson, enjoyable)\n"+ - "_nn(lesson, science)\n"+ - "than(maths, science)\n"+ - "_comparative(enjoyable, maths)\n"+ - "degree(enjoyable, comparative)\n"); - - rc &= test_sentence ("I find maths lessons less enjoyable than science lessons.", - "_iobj(find, maths)\n"+ - "_obj(find, lesson)\n"+ - "_subj(find, I)\n"+ - "_amod(lesson, enjoyable)\n"+ - "_nn(lesson, science)\n"+ - "than(maths, science)\n"+ - "_comparative(enjoyable, maths)\n"+ - "_advmod(enjoyable, less)\n"+ - "degree(enjoyable, comparative)\n"); - report(rc, "Comparatives"); - return rc; - } - public boolean test_Conjunction() - { - boolean rc = true; - //conjoined verbs - rc &= test_sentence ("Scientists make observations and ask questions.", - "_obj(make, observation)\n" + - "_obj(ask, question)\n" + - "_subj(make, scientist)\n" + - "_subj(ask, scientist)\n" + - "conj_and(make, ask)\n"); - //conjoined nouns - rc &= test_sentence ("She is a student and an employee.", - "_obj(be, student)\n" + - "_obj(be, employee)\n" + - "_subj(be, she)\n" + - "conj_and(student, employee)\n"); - //conjoined adjectives - rc &= test_sentence ("I hailed a black and white taxi.", - "_obj(hail, taxi)\n" + - "_subj(hail, I)\n" + - "_amod(taxi, black)\n" + - "_amod(taxi, white)\n" + - "conj_and(black, white)\n"); - //conjoined adverbs - rc &= test_sentence ("She ran quickly and quietly.", - "_advmod(run, quickly)\n" + - "_advmod(run, quietly)\n" + - "_subj(run, she)\n" + - "conj_and(quickly, quietly)\n"); - //adjectival modifiers on conjoined subject - rc &= test_sentence ("The big truck and the little car collided.", - "_amod(car, little)\n" + - "_amod(truck, big)\n" + - "_subj(collide, truck)\n" + - "_subj(collide, car)\n" + - "conj_and(truck, car)\n"); - //verbs with modifiers - rc &= test_sentence ( "We ate dinner at home and went to the movies.", - "_obj(eat, dinner)\n" + - "conj_and(eat, go)\n" + - "at(eat, home)\n" + - "_subj(eat, we)\n" + - "to(go, movie)\n" + - "_subj(go, we)\n"); - //verb with more modifiers - rc &= test_sentence ("We ate a late dinner at home and went out to the movies afterwards.", - "_obj(eat, dinner)\n" + - "conj_and(eat, go_out)\n" + - "at(eat, home)\n" + - "_subj(eat, we)\n" + - "to(go_out, movie)\n" + - "_advmod(go_out, afterwards)\n" + - "_subj(go_out, we)\n" + - "_amod(dinner, late)\n"); - - //conjoined ditransitive verbs - rc &= test_sentence ("She baked him a cake and sang him a song.", - "_iobj(sing, him)\n" + - "_obj(sing, song)\n" + - "_subj(sing, she)\n" + - "_iobj(bake, him)\n" + - "_obj(bake, cake)\n" + - "conj_and(bake, sing)\n" + - "_subj(bake, she)\n"); - //conjoined adverbs with modifiers - rc &= test_sentence ("she ran very quickly and extremely quietly.", - "_advmod(run, quickly)\n" + - "_advmod(run, quietly)\n" + - "_subj(run, she)\n" + - "_advmod(quietly, extremely)\n" + - "conj_and(quickly, quietly)\n" + - "_advmod(quickly, very)\n"); - //conjoined adverbs with out modifiers - rc &= test_sentence ("She handled it quickly and gracefully.", - "_obj(handle, quickly)\n" + - "_obj(handle, gracefully)\n" + - "_advmod(handle, quickly)\n" + - "_advmod(handle, gracefully)\n" + - "_subj(handle, she)\n" + - "conj_and(quickly, gracefully)\n"); - //modifiers on conjoined adjectives - rc &= test_sentence ("He had very long and very white hair.", - "_obj(have, hair)\n" + - "_subj(have, he)\n" + - "_amod(hair, long)\n" + - "_amod(hair, white)\n" + - "_advmod(white, very)\n" + - "conj_and(long, white)\n" + - "_advmod(long, very)\n"); - //adjectival modifiers on conjoined object - rc &= test_sentence ("The collision was between the little car and the big truck.", - "_pobj(between, car)\n" + - "_pobj(between, truck)\n" + - "_psubj(between, collision)\n" + - "_amod(truck, big)\n" + - "_amod(car, little)\n" + - "conj_and(car, truck)\n"); - //Names Modifiers and conjunction - rc &= test_sentence ("Big Tom and Angry Sue went to the movies.", - "to(go, movie)\n" + - "_subj(go, Big_Tom)\n" + - "_subj(go, Angry_Sue)\n" + - "conj_and(Big_Tom, Angry_Sue)\n"); - - report(rc, "Conjunction"); - return rc; - } - public boolean test_extraposition() - { - boolean rc = true; - rc &= test_sentence ("The woman who lives next door is a registered nurse.", - "_obj(be, nurse)\n" + - "_subj(be, woman)\n" + - "_amod(nurse, registered)\n" + - "_advmod(live, next_door)\n" + - "_subj(live, woman)\n" + - "who(woman, live)\n"); - - rc &= test_sentence ("A player who is injured has to leave the field.", - "_to-do(have, leave)\n" + - "_subj(have, player)\n" + - "_obj(leave, field)\n" + - "_predadj(player, injured)\n" + - "who(player, injured)\n" ); - - rc &= test_sentence ("Pizza, which most people love, is not very healthy.", - "_advmod(very, not)\n" + - "_advmod(healthy, very)\n" + - "_obj(love, Pizza)\n" + - "_quantity(people, most)\n" + - "which(Pizza, love)\n" + - "_subj(love, people)\n" + - "_predadj(Pizza, healthy)\n" ); - - rc &= test_sentence ("The restaurant which belongs to my aunt is very famous.", - "_advmod(famous, very)\n" + - "to(belong, aunt)\n" + - "_subj(belong, restaurant)\n" + - "_poss(aunt, me)\n" + - "which(restaurant, belong)\n" + - "_predadj(restaurant, famous)\n"); - - rc &= test_sentence ("The books which I read in the library were written by Charles Dickens.", - "_obj(write, book)\n" + - "by(write, Charles_Dickens)\n" + - "_obj(read, book)\n" + - "in(read, library)\n" + - "_subj(read, I)\n" + - "which(book, read)\n"); - - rc &= test_sentence("This is the book whose author I met in a library.", - "_obj(be, book)\n" + - "_subj(be, this)\n" + - "_obj(meet, author)\n" + - "in(meet, library)\n" + - "_subj(meet, I)\n" + - "whose(book, author)\n"); - - rc &= test_sentence("The book that Jack lent me is very boring.", - "_advmod(boring, very)\n" + - "_iobj(lend, book)\n" + - "_obj(lend, me)\n" + - "_subj(lend, Jack)\n" + - "that(book, lend)\n" + - "_predadj(book, boring)\n"); - - rc &= test_sentence("They ate a special curry which was recommended by the restaurant’s owner.", - "_obj(eat, curry)\n" + - "_subj(eat, they)\n" + - "_obj(recommend, curry)\n" + - "by(recommend, owner)\n" + - "_poss(owner, restaurant)\n" + - "which(curry, recommend)\n" + - "_amod(curry, special)\n"); - - rc &= test_sentence("The dog who Jack said chased me was black.", - "_obj(chase, me)\n" + - "_subj(chase, dog)\n" + - "_subj(say, Jack)\n" + - "_predadj(dog, black)\n" + - "who(dog, chase)\n"); - - rc &= test_sentence("Jack, who hosted the party, is my cousin.", - "_obj(be, cousin)\n" + - "_subj(be, Jack)\n" + - "_poss(cousin, me)\n" + - "_obj(host, party)\n" + - "_subj(host, Jack)\n" + - "who(Jack, host)\n"); - - rc &= test_sentence("Jack, whose name is in that book, is the student near the window.", - "near(be, window)\n" + - "_obj(be, student)\n" + - "_subj(be, Jack)\n" + - "_pobj(in, book)\n" + - "_psubj(in, name)\n" + - "_det(book, that)\n" + - "whose(Jack, name)\n"); - - rc &= test_sentence("Jack stopped the police car that was driving fast.", - "_obj(stop, car)\n" + - "_subj(stop, Jack)\n" + - "_advmod(drive, fast)\n" + - "_subj(drive, car)\n" + - "that(car, drive)\n" + - "_nn(car, police)\n"); - - rc &= test_sentence("Just before the crossroads, the car was stopped by a traffic sign that stood on the street.", - "_obj(stop, car)\n" + - "by(stop, sign)\n" + - "_advmod(stop, just)\n" + - "on(stand, street)\n" + - "_subj(stand, sign)\n" + - "that(sign, stand)\n" + - "_nn(sign, traffic)\n" + - "before(just, crossroads)\n"); - - report(rc, "Extrapostion"); - return rc; - } - - - public static void main(String[] args) - { - setUpClass(); - TestRelEx ts = new TestRelEx(); - ts.runTests(); - } - - @Test - public void runTests() { - TestRelEx ts = this; - boolean rc = true; - - rc &= ts.test_comparatives(); - rc &= ts.test_extraposition(); - rc &= ts.test_Conjunction(); - - if (rc) { - System.err.println("Tested " + ts.pass + " sentences, test passed OK"); - } else { - System.err.println("Test failed\n\t" + - ts.fail + " sentences failed\n\t" + - ts.pass + " sentences passed"); - } - - System.err.println("******************************"); - System.err.println("Failed test sentences on Relex"); - System.err.println("******************************"); - if(sentfail.isEmpty()) - System.err.println("All test sentences passed"); - for(String temp : sentfail){ - System.err.println(temp); - } - System.err.println("******************************\n"); - } -} +/* + * Copyright 2009 Linas Vepstas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package relex.test; + +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.hasItem; +import static org.hamcrest.Matchers.hasSize; +import static org.junit.Assert.assertThat; + +import java.util.List; +import java.util.Set; + +import junitparams.JUnitParamsRunner; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; + +import relex.ParsedSentence; +import relex.RelationExtractor; +import relex.Sentence; +import relex.output.SimpleView; + +import com.google.common.base.Optional; +import com.google.common.base.Splitter; +import com.google.common.collect.ImmutableList; + +@RunWith(JUnitParamsRunner.class) +public class TestRelEx { + private static RelationExtractor re; + + @BeforeClass + public static void setUpClass() { + re = new RelationExtractor(); + } + + /** + * Succeds if RelEx generates the same relations as {@code sf}. + * @param sentence Sentence + * @param sf set of the relations that RelEx should be generating. + * @param description Description + */ + protected void testSentence(String sentence, Set sf, Optional description) { + re.do_penn_tagging = false; + re.setMaxParses(1); + Sentence sntc = re.processSentence(sentence); + ParsedSentence parse = sntc.getParses().get(0); + String rs = SimpleView.printBinaryRelations(parse); + String urs = SimpleView.printUnaryRelations(parse); + + List exp = ImmutableList.copyOf(sf); + List brgot = Splitter.on("\n").omitEmptyStrings().splitToList(rs); + List urgot = Splitter.on("\n").omitEmptyStrings().splitToList(urs); + + //add number of binary relations from parser-output, to total number of relationships got + int sizeOfGotRelations= brgot.size(); + //check expected binary and unary relations + //the below for-loop checks whether all expected binary relations are + //contained in the parser-binary-relation-output arrayList "brgot". + //if any unary relations are expected in the output it checks the + //parser-unary-relation-output arrayList "urgot" for unary relationships + for (int i=0; i< exp.size(); i++) + { + if(!brgot.contains(exp.get(i))) + { + assertThat("content miscompare:\n" + + "\tExpected = " + exp + "\n" + + "\tGot Binary Relations = " + brgot + "\n" + + "\tGot Unary Relations = " + urgot + "\n" + + "\tSentence = " + sentence, + urgot, hasItem(exp.get(i))); + //add the unary relation, count to total number of binary relations + sizeOfGotRelations++; + } + + } + //The size checking of the expected relationships vs output relationships + //is done here purposefully, to accommodate if there is any unary relationships present + //in the expected output(see above for-loop also). + //However it only checks whether parser-output resulted more relationships(binary+unary) than expected relations + //If the parser-output resulted less relationships(binary+unary) than expected it would + //catch that in the above for-loop + assertThat("size miscompare:\n" + + "\tExpected = " + exp + "\n" + + "\tGot Binary Relations = " + brgot + "\n" + + "\tGot Unary Relations = " + urgot + "\n" + + "\tSentence = " + sentence, + exp, hasSize(greaterThanOrEqualTo(sizeOfGotRelations))); + } + + @Test + @junitparams.Parameters(source=RelExCases.class, method="provideComparatives") + public void comparatives(String sentence, Set expected, Optional description) { + testSentence(sentence, expected, description); + } + + @Test + @junitparams.Parameters(source=RelExCases.class, method="provideConjunction") + public void conjunction(String sentence, Set expected, Optional description) { + testSentence(sentence, expected, description); + } + + @Test + @junitparams.Parameters(source=RelExCases.class, method="provideExtraposition") + public void extraposition(String sentence, Set expected, Optional description) { + testSentence(sentence, expected, description); + } + +} diff --git a/src/java_test/relex/test/TestStanford.java b/src/java_test/relex/test/TestStanford.java index 5db4b33a3..3822d08c9 100644 --- a/src/java_test/relex/test/TestStanford.java +++ b/src/java_test/relex/test/TestStanford.java @@ -1,596 +1,102 @@ -/* - * Copyright 2009 Linas Vepstas - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package relex.test; - -import java.util.ArrayList; -import java.util.Collections; - -import org.junit.BeforeClass; -import org.junit.Test; - -import relex.ParsedSentence; -import relex.RelationExtractor; -import relex.Sentence; -import relex.output.StanfordView; - -public class TestStanford -{ - private static RelationExtractor re; - private int pass; - private int fail; - private static ArrayList sentfail= new ArrayList(); - private static ArrayList sentfailpostag= new ArrayList(); - - @BeforeClass - public static void setUpClass() { - re = new RelationExtractor(); - } - - public TestStanford() - { - re.do_stanford = true; - pass = 0; - fail = 0; - } - - public ArrayList split(String a) - { - String[] sa = a.split("\n"); - ArrayList saa = new ArrayList(); - for (String s : sa) - { - saa.add(s); - } - Collections.sort (saa); - return saa; - } - - /** - * First argument is the sentence. - * Second argument is a list of the relations that the - * Stanford parser generates. - * Return true if relex generates that same dependencies - * as the second argument. - */ - public boolean test_sentence (String sent, String sf) - { - re.do_penn_tagging = false; - Sentence sntc = re.processSentence(sent); - ParsedSentence parse = sntc.getParses().get(0); - String rs = StanfordView.printRelations(parse, false); - - ArrayList sfa = split(sf); - ArrayList rsa = split(rs); - if (sfa.size() != rsa.size()) - { - System.err.println("Error: size miscompare:\n" + - "\tStanford = " + sfa + "\n" + - "\tRelEx = " + rsa + "\n" + - "\tSentence = " + sent); - fail ++; - sentfail.add(sent); - return false; - } - for (int i=0; i< sfa.size(); i++) - { - if (!sfa.get(i).equals (rsa.get(i))) - { - System.err.println("Error: content miscompare:\n" + - "\tStanford = " + sfa + "\n" + - "\tRelEx = " + rsa + "\n" + - "\tSentence = " + sent); - fail ++; - sentfail.add(sent); - return false; - } - } - - pass ++; - return true; - } - - public boolean test_tagged_sentence (String sent, String sf) - { - re.do_penn_tagging = true; - Sentence sntc = re.processSentence(sent); - ParsedSentence parse = sntc.getParses().get(0); - String rs = StanfordView.printRelations(parse, true); - - ArrayList sfa = split(sf); - ArrayList rsa = split(rs); - if (sfa.size() != rsa.size()) - { - System.err.println("Error: size miscompare:\n" + - "\tStanford = " + sfa + "\n" + - "\tRelEx = " + rsa + "\n" + - "\tSentence = " + sent); - fail ++; - sentfailpostag.add(sent); - return false; - } - for (int i=0; i< sfa.size(); i++) - { - if (!sfa.get(i).equals (rsa.get(i))) - { - System.err.println("Error: content miscompare:\n" + - "\tStanford = " + sfa + "\n" + - "\tRelEx = " + rsa + "\n" + - "\tSentence = " + sent); - fail ++; - sentfailpostag.add(sent); - return false; - } - } - - pass ++; - return true; - } - - public static void main(String[] args) - { - setUpClass(); - TestStanford ts = new TestStanford(); - ts.runTests(); - } - - @Test - public void runTests() { - TestStanford ts = this; - boolean rc = true; - - // The parses below were compared to the Stanford parser, circa - // 2009. Since then, it is likely that Stanford has changed. - // The tests below should not be changed, unless a) they are - // changed to be comaptible with current-day Stanford, and b) they - // pass. - rc &= ts.test_sentence ("Who invented sliced bread?", - "nsubj(invented-2, who-1)\n" + - "amod(bread-4, sliced-3)\n" + - "dobj(invented-2, bread-4)"); - - rc &= ts.test_sentence ("Jim runs quickly.", - "nsubj(runs-2, Jim-1)\n" + - "advmod(runs-2, quickly-3)"); - - rc &= ts.test_sentence ("The bird, a robin, sang sweetly.", - "det(bird-2, the-1)\n" + - "nsubj(sang-7, bird-2)\n" + - "det(robin-5, a-4)\n" + - "appos(bird-2, robin-5)\n" + - "advmod(sang-7, sweetly-8)"); - - rc &= ts.test_sentence ("There is a place we can go.", - "expl(is-2, there-1)\n" + - "det(place-4, a-3)\n" + - "nsubj(is-2, place-4)\n" + - "nsubj(go-7, we-5)\n" + - "aux(go-7, can-6)"); - // wtf ?? dep is not documented .. not sure what to do here ... - // "dep(is-2, go-7)"); - - rc &= ts.test_sentence ("The linebacker gave the quarterback a push.", - "det(linebacker-2, the-1)\n" + - "nsubj(gave-3, linebacker-2)\n" + - "det(quarterback-5, the-4)\n" + - "iobj(gave-3, quarterback-5)\n" + - "det(push-7, a-6)\n" + - "dobj(gave-3, push-7)\n"); - - rc &= ts.test_sentence ("He stood at the goal line.", - "nsubj(stood-2, he-1)\n" + - "det(line-6, the-4)\n" + - "nn(line-6, goal-5)\n" + - "prep_at(stood-2, line-6)"); - - // acomp example from Stanford docs - rc &= ts.test_sentence ("She looks very beautiful.", - "nsubj(looks-2, she-1)\n" + - "advmod(beautiful-4, very-3)\n" + - "acomp(looks-2, beautiful-4)"); - - // advcl example from Stanford docs - rc &= ts.test_sentence ("The accident happened as the night was falling.", - "det(accident-2, the-1)\n" + - "nsubj(happened-3, accident-2)\n" + - "mark(falling-8, as-4)\n" + - "det(night-6, the-5)\n" + - "nsubj(falling-8, night-6)\n" + - "aux(falling-8, was-7)\n" + - "advcl(happened-3, falling-8)"); - - // advcl example from Stanford docs - rc &= ts.test_sentence ("If you know who did it, you should tell the teacher.", - "mark(know-3, if-1)\n" + - "nsubj(know-3, you-2)\n" + - "advcl(tell-10, know-3)\n" + - "nsubj(did-5, who-4)\n" + - "ccomp(know-3, did-5)\n" + - "dobj(did-5, it-6)\n" + - "nsubj(tell-10, you-8)\n" + - "aux(tell-10, should-9)\n" + - "det(teacher-12, the-11)\n" + - "dobj(tell-10, teacher-12)"); - - // agent example from Stanford docs - rc &= ts.test_sentence ("The man has been killed by the police.", - "det(man-2, the-1)\n" + - "nsubjpass(killed-5, man-2)\n" + - "aux(killed-5, has-3)\n" + - "auxpass(killed-5, been-4)\n" + - "det(police-8, the-7)\n" + - "agent(killed-5, police-8)"); - - rc &= ts.test_sentence ("Effects caused by the protein are important.", - "nsubj(important-7, effects-1)\n" + - "partmod(effects-1, caused-2)\n" + - "det(protein-5, the-4)\n" + - "agent(caused-2, protein-5)\n" + - "cop(important-7, are-6)"); - - rc &= ts.test_sentence ("Sam, my brother, has arrived.", - "nsubj(arrived-7, Sam-1)\n" + - "poss(brother-4, my-3)\n" + - "appos(Sam-1, brother-4)\n" + - "aux(arrived-7, has-6)"); - - rc &= ts.test_sentence ("What is that?", - "attr(is-2, what-1)\n" + - "nsubj(is-2, that-3)"); - - rc &= ts.test_sentence ("Reagan has died.", - "nsubj(died-3, Reagan-1)\n" + - "aux(died-3, has-2)"); - - rc &= ts.test_sentence ("He should leave.", - "nsubj(leave-3, he-1)\n" + - "aux(leave-3, should-2)"); - - rc &= ts.test_sentence ("Kennedy has been killed.", - "nsubjpass(killed-4, Kennedy-1)\n" + - "aux(killed-4, has-2)\n" + - "auxpass(killed-4, been-3)"); - - rc &= ts.test_sentence ("Kennedy was killed.", - "nsubjpass(killed-3, Kennedy-1)\n" + - "auxpass(killed-3, was-2)"); - - rc &= ts.test_sentence ("Kennedy got killed.", - "nsubjpass(killed-3, Kennedy-1)\n" + - "auxpass(killed-3, got-2)"); - - rc &= ts.test_sentence ("Bill is big.", - "nsubj(big-3, Bill-1)\n" + - "cop(big-3, is-2)\n"); - - rc &= ts.test_sentence ("Bill is an honest man.", - "nsubj(man-5, Bill-1)\n" + - "cop(man-5, is-2)\n" + - "det(man-5, an-3)\n" + - "amod(man-5, honest-4)"); - - rc &= ts.test_sentence ("What she said makes sense.", - "dobj(said-3, what-1)\n" + - "nsubj(said-3, she-2)\n" + - "csubj(makes-4, said-3)\n" + - "dobj(makes-4, sense-5)"); - - rc &= ts.test_sentence ("What she said is not true.", - "dobj(said-3, what-1)\n" + - "nsubj(said-3, she-2)\n" + - "csubj(true-6, said-3)\n" + - "cop(true-6, is-4)\n" + - "neg(true-6, not-5)"); - - rc &= ts.test_sentence ("Which book do you prefer?", - "det(book-2, which-1)\n" + - "dobj(prefer-5, book-2)\n" + - "aux(prefer-5, do-3)\n" + - "nsubj(prefer-5, you-4)"); - - rc &= ts.test_sentence ("There is a ghost in the room.", - "expl(is-2, there-1)\n" + - "det(ghost-4, a-3)\n" + - "nsubj(is-2, ghost-4)\n" + - "det(room-7, the-6)\n" + - "prep_in(is-2, room-7)"); - - rc &= ts.test_sentence ("She gave me a raise.", - "nsubj(gave-2, she-1)\n" + - "iobj(gave-2, me-3)\n" + - "det(raise-5, a-4)\n" + - "dobj(gave-2, raise-5)"); - - rc &= ts.test_sentence ("The director is 65 years old.", - "det(director-2, the-1)\n" + - "nsubj(old-6, director-2)\n" + - "cop(old-6, is-3)\n" + - "num(years-5, 65-4)\n" + - "measure(old-6, years-5)"); - - rc &= ts.test_sentence ("Sam eats 3 sheep.", - "nsubj(eats-2, Sam-1)\n" + - "num(sheep-4, 3-3)\n" + - "dobj(eats-2, sheep-4)"); - -/**************** - * I don't get it. Stanford makes a num/number distinction I can't grok. - rc &= ts.test_sentence ("I lost $ 3.2 billion.", - "nsubj(lost-2, I-1)\n" + - "dobj(lost-2, $-3)\n" + - "number($-3, 3.2-4)\n" + - "number($-3, billion-5)"); -***********/ - - rc &= ts.test_sentence ("Truffles picked during the spring are tasty.", - "nsubj(tasty-7, truffles-1)\n" + - "partmod(truffles-1, picked-2)\n" + - "det(spring-5, the-4)\n" + - "prep_during(picked-2, spring-5)\n" + - "cop(tasty-7, are-6)"); - -/**************** - * Currently fails due to xcomp generation problems - * - rc &= ts.test_sentence ("We went to their offices to get Bill's clothes.", - "nsubj(went-2, we-1)\n" + - "xsubj(get-7, we-1)\n" + - "poss(offices-5, their-4)\n" + - "prep_to(went-2, offices-5)\n" + - "aux(get-7, to-6)\n" + - "xcomp(went-2, get-7)\n" + - "poss(clothes-10, Bill-8)\n" + - "dobj(get-7, clothes-10)"); -***********/ - -/**************** - * See README-Stanford for details. - rc &= ts.test_sentence ("All the boys are here.", - "predet(boys-3, all-1)\n" + - "det(boys-3, the-2)\n" + - "nsubj(are-4, boys-3)\n" + - "advmod(are-4, here-5)"); -***********/ - -/**************** - * These are ambiguous parses. - * Stanford picks the opposite choice from Relex. - * See the README-Stanford for a discussion. - rc &= ts.test_sentence ("I saw a cat in a hat.", - "nsubj(saw-2, I-1)\n" + - "det(cat-4, a-3)\n" + - "dobj(saw-2, cat-4)\n" + - "det(hat-7, a-6)\n" + - "prep_in(cat-4, hat-7)"); - - rc &= ts.test_sentence ("I saw a cat with a telescope.", - "nsubj(saw-2, I-1)\n" + - "det(cat-4, a-3)\n" + - "dobj(saw-2, cat-4)\n" + - "det(telescope-7, a-6)\n" + - "prep_with(cat-4, telescope-7)"); -***********/ - - rc &= ts.test_sentence ("He is responsible for meals.", - "nsubj(responsible-3, he-1)\n" + - "cop(responsible-3, is-2)\n" + - "prep_for(responsible-3, meals-5)\n"); - - rc &= ts.test_sentence ("They shut down the station.", - "nsubj(shut-2, they-1)\n" + - "prt(shut-2, down-3)\n" + - "det(station-5, the-4)\n" + - "dobj(shut-2, station-5)"); - - rc &= ts.test_sentence ("About 200 people came to the party.", - "quantmod(200-2, about-1)\n" + - "num(people-3, 200-2)\n" + - "nsubj(came-4, people-3)\n" + - "det(party-7, the-6)\n" + - "prep_to(came-4, party-7)"); - - rc &= ts.test_sentence ("I saw the man who you love.", - "nsubj(saw-2, I-1)\n" + - "det(man-4, the-3)\n" + - "dobj(saw-2, man-4)\n" + - "dobj(love-7, man-4)\n" + - "rel(love-7, who-5)\n" + - "nsubj(love-7, you-6)\n" + - "rcmod(man-4, love-7)"); - - -/**************** - * - * relex is failing to generate teh following: - * Almost got it w/the B** rules but not quite ... - -rel(love-8, wife-6) -rcmod(man-4, love-8) - - rc &= ts.test_sentence ("I saw the man whose wife you love.", - "nsubj(saw-2, I-1)\n" + - "det(man-4, the-3)\n" + - "dobj(saw-2, man-4)\n" + - "poss(wife-6, whose-5)\n" + - "dobj(love-8, wife-6)\n" + - "rel(love-8, wife-6)\n" + - "nsubj(love-8, you-7)\n" + - "rcmod(man-4, love-8)"); -***********/ - - rc &= ts.test_sentence ("I am ready to leave.", - "nsubj(ready-3, I-1)\n" + - "cop(ready-3, am-2)\n" + - "aux(leave-5, to-4)\n" + - "xcomp(ready-3, leave-5)"); - - rc &= ts.test_sentence ("Tom likes to eat fish.", - "nsubj(likes-2, Tom-1)\n" + - "xsubj(eat-4, Tom-1)\n" + - "aux(eat-4, to-3)\n" + - "xcomp(likes-2, eat-4)\n" + - "dobj(eat-4, fish-5)"); - - -/**************** - rc &= ts.test_sentence ("He says that you like to swim.", - "nsubj(says-2, he-1)\n" + - "complm(like-5, that-3)\n" + - "nsubj(like-5, you-4)\n" + - "ccomp(says-2, like-5)\n" + - "nsubj(swim-7, to-6)\n" + // NFW that this can be right. - "ccomp(like-5, swim-7)"); -***********/ - - -/**************** - rc &= ts.test_sentence ("The garage is next to the house.", - "det(garage-2, the-1)\n" + - "nsubj(next-4, garage-2)\n" + - "cop(next-4, is-3)\n" + - "det(house-7, the-6)\n" + - "prep_to(next-4, house-7)"); -***********/ - - // ========================================================= - // PENN PART_OF_SPEECH TAGGING - // ========================================================= - // - rc &= ts.test_tagged_sentence ("Truffles picked during the spring are tasty.", - "nsubj(tasty-7-JJ, truffles-1-NNS)\n" + - "partmod(truffles-1-NNS, picked-2-VBN)\n" + - "det(spring-5-NN, the-4-DT)\n" + - "prep_during(picked-2-VBN, spring-5-NN)\n" + - "cop(tasty-7-JJ, are-6-VBP)"); - - rc &= ts.test_tagged_sentence ("I ate twelve truffles.", - "nsubj(ate-2-VBD, I-1-PRP)\n" + - "num(truffles-4-NNS, twelve-3-CD)\n" + - "dobj(ate-2-VBD, truffles-4-NNS)"); - - rc &= ts.test_tagged_sentence ("I have eaten twelve truffles.", - "nsubj(eaten-3-VBN, I-1-PRP)\n" + - "aux(eaten-3-VBN, have-2-VBP)\n" + - "num(truffles-5-NNS, twelve-4-CD)\n" + - "dobj(eaten-3-VBN, truffles-5-NNS)"); - - rc &= ts.test_tagged_sentence ("I had eaten twelve truffles.", - "nsubj(eaten-3-VBN, I-1-PRP)\n" + - "aux(eaten-3-VBN, had-2-VBD)\n" + - "num(truffles-5-NNS, twelve-4-CD)\n" + - "dobj(eaten-3-VBN, truffles-5-NNS)"); - - rc &= ts.test_tagged_sentence ("The truffles were eaten.", - "det(truffles-2-NNS, the-1-DT)\n" + - "nsubjpass(eaten-4-VBN, truffles-2-NNS)\n" + - "auxpass(eaten-4-VBN, were-3-VBD)"); - - - // Full disclosure: Stanford currently generates - // dep(time-4-NN, young-8-JJ) which just means it doesn't know - // the right answer (which is advcl, right?). - // It also generates advmod(young-8-JJ, when-5-WRB) in addition - // to rel(young-8-JJ, when-5-WRB) which is not quite right - // either. - rc &= ts.test_tagged_sentence ("There was a time when we were young.", - "expl(was-2-VBD, there-1-EX)\n" + - "det(time-4-NN, a-3-DT)\n" + - "nsubj(was-2-VBD, time-4-NN)\n" + - "rel(young-8-JJ, when-5-WRB)\n" + - "nsubj(young-8-JJ, we-6-PRP)\n" + - "cop(young-8-JJ, were-7-VBD)\n" + - "advcl(time-4-NN, young-8-JJ)"); - - rc &= ts.test_tagged_sentence ("Is there a better way?", - "expl(is-1-VBZ, there-2-EX)\n" + - "det(way-5-NN, a-3-DT)\n" + - "amod(way-5-NN, better-4-JJR)\n" + - "nsubj(is-1-VBZ, way-5-NN)"); - - rc &= ts.test_tagged_sentence ("Is this the largest you can find?", - "cop(largest-4-JJS, is-1-VBZ)\n" + - "nsubj(largest-4-JJS, this-2-DT)\n" + - "det(largest-4-JJS, the-3-DT)\n" + - "nsubj(find-7-VB, you-5-PRP)\n" + - "aux(find-7-VB, can-6-MD)\n" + - "rcmod(largest-4-JJS, find-7-VB)"); - - rc &= ts.test_tagged_sentence ("But my efforts to win his heart have failed.", - "poss(efforts-3-NNS, my-2-PRP$)\n" + - "nsubj(failed-9-VBN, efforts-3-NNS)\n" + - "aux(win-5-VB, to-4-TO)\n" + - "infmod(efforts-3-NNS, win-5-VB)\n" + - "poss(heart-7-NN, his-6-PRP$)\n" + - "dobj(win-5-VB, heart-7-NN)\n" + - "aux(failed-9-VBN, have-8-VBP)"); - - rc &= ts.test_tagged_sentence ("The undergrads are occasionally late.", - "det(undergrads-2-NNS, the-1-DT)\n" + - "nsubj(late-5-JJ, undergrads-2-NNS)\n" + - "cop(late-5-JJ, are-3-VBP)\n" + - "advmod(late-5-JJ, occasionally-4-RB)"); - - rc &= ts.test_tagged_sentence ("The height of Mount Everest is 8,848 metres.", - "det(height-2-NN, the-1-DT)\n" + - "nsubj(metres-8-NNS, height-2-NN)\n" + - "nn(Everest-5-NNP, Mount-4-NNP)\n" + - "prep_of(height-2-NN, Everest-5-NNP)\n" + - "cop(metres-8-NNS, is-6-VBZ)\n" + - "num(metres-8-NNS, 8,848-7-CD)"); - - rc &= ts.test_tagged_sentence ("It happened on December 3rd, 1990.", - "nsubj(happened-2-VBD, it-1-PRP)\n" + - "prep_on(happened-2-VBD, December-4-NNP)\n" + - "num(December-4-NNP, 3rd-5-CD)\n" + - "num(December-4-NNP, 1990-7-CD)"); - - - if (rc) - { - System.err.println("Tested " + ts.pass + " sentences, test passed OK"); - } - else - { - System.err.println("Test failed\n\t" + - ts.fail + " sentences failed\n\t" + - ts.pass + " sentences passed"); - } - - System.err.println("********************************************************"); - System.err.println("Failed test sentences on Stanford with POS tagging FALSE"); - System.err.println("********************************************************"); - - if(sentfail.isEmpty()) - System.err.println("All test sentences passed"); - - for(String temp : sentfail){ - System.err.println(temp); - } - System.err.println("********************************************************\n"); - - System.err.println("********************************************************"); - System.err.println("Failed test sentences on Stanford with POS tagging TRUE"); - System.err.println("********************************************************"); - - if(sentfailpostag.isEmpty()) - System.err.println("All test sentences passed"); - - for(String temp : sentfailpostag){ - System.err.println(temp); - } - System.err.println("********************************************************\n"); - } -} +/* + * Copyright 2009 Linas Vepstas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package relex.test; + +import static org.hamcrest.Matchers.hasSize; +import static org.junit.Assert.assertThat; + +import java.util.List; +import java.util.Set; + +import junitparams.JUnitParamsRunner; + +import org.hamcrest.Matchers; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; + +import relex.ParsedSentence; +import relex.RelationExtractor; +import relex.Sentence; +import relex.output.StanfordView; + +import com.google.common.base.Optional; +import com.google.common.base.Splitter; +import com.google.common.collect.ImmutableList; + +@RunWith(JUnitParamsRunner.class) +public class TestStanford { + private static RelationExtractor re; + + @BeforeClass + public static void setUpClass() { + re = new RelationExtractor(); + re.do_stanford = true; + } + + /** + * First argument is the sentence. + * Second argument is a list of the relations that the + * Stanford parser generates. + * Return true if relex generates that same dependencies + * as the second argument. + */ + @Test + @junitparams.Parameters(source=StanfordCases.class, method="provideUntagged") + public void untaggedSentence(String sent, Set sf, Optional description) { + re.do_penn_tagging = false; + Sentence sntc = re.processSentence(sent); + ParsedSentence parse = sntc.getParses().get(0); + String rs = StanfordView.printRelations(parse, false); + + List sfa = ImmutableList.copyOf(sf); + List rsa = Splitter.on("\n").omitEmptyStrings().splitToList(rs); + assertThat("Error: size miscompare:\n" + + "\tStanford = " + sfa + "\n" + + "\tRelEx = " + rsa + "\n" + + "\tSentence = " + sent, + sfa, hasSize(rsa.size())); + assertThat("Error: content miscompare:\n" + + "\tStanford = " + sfa + "\n" + + "\tRelEx = " + rsa + "\n" + + "\tSentence = " + sent, + sfa, Matchers.containsInAnyOrder(rsa.toArray(new String[] {}))); + } + + @Test + @junitparams.Parameters(source=StanfordCases.class, method="provideTagged") + public void taggedSentence(String sent, Set sf, Optional description) { + re.do_penn_tagging = true; + Sentence sntc = re.processSentence(sent); + ParsedSentence parse = sntc.getParses().get(0); + String rs = StanfordView.printRelations(parse, true); + + List sfa = ImmutableList.copyOf(sf); + List rsa = Splitter.on("\n").omitEmptyStrings().splitToList(rs); + assertThat("Error: size miscompare:\n" + + "\tStanford = " + sfa + "\n" + + "\tRelEx = " + rsa + "\n" + + "\tSentence = " + sent, + sfa, hasSize(rsa.size())); + assertThat("Error: content miscompare:\n" + + "\tStanford = " + sfa + "\n" + + "\tRelEx = " + rsa + "\n" + + "\tSentence = " + sent, + sfa, Matchers.containsInAnyOrder(rsa.toArray(new String[] {}))); + } + +} diff --git a/src/test/resources/logback.xml b/src/test/resources/logback.xml new file mode 100644 index 000000000..caa2d35f2 --- /dev/null +++ b/src/test/resources/logback.xml @@ -0,0 +1,16 @@ + + + + + %d{HH:mm:ss.SSS} %highlight(%-5level) | %magenta(%-16.16thread) %cyan(%-32.32logger{1}) | %msg%n + + + + + + + + + + + diff --git a/src/test/resources/relex-comparatives.tsv b/src/test/resources/relex-comparatives.tsv new file mode 100644 index 000000000..e20614a0a --- /dev/null +++ b/src/test/resources/relex-comparatives.tsv @@ -0,0 +1,455 @@ +Sentence Relations +Some people like pigs less than dogs. + _advmod(like, less) + _obj(like, pig) + _quantity(people, some) + _subj(like, people) + than(pig, dog) + +Some people like pigs more than dogs. + _advmod(like, more) + _obj(like, pig) + _quantity(people, some) + _subj(like, people) + than(pig, dog) + +//Non-equal Gradable : Two entities one feature more/less +He is more intelligent than John. + than(he, John) + _comparative(intelligent, he) + degree(intelligent, comparative) + _predadj(he, intelligent) + +He is less intelligent than John. + than(he, John) + _comparative(intelligent, he) + degree(intelligent, comparative) + _advmod(intelligent, less) + _predadj(he, intelligent) + +He runs more quickly than John. + _advmod(run, quickly) + _advmod(quickly, more) + _subj(run, he) + than(he, John) + _comparative(quickly, run) + degree(quickly, comparative) + +He runs less quickly than John. + _advmod(run, quickly) + _subj(run, he) + _advmod(quickly, less) + than(he, John) + _comparative(quickly, run) + degree(quickly, comparative) + +He runs more quickly than John does. + _advmod(run, quickly) + _advmod(quickly, more) + _subj(run, he) + _subj(do, John) + than(he, John) + _comparative(quickly, run) + degree(quickly, comparative) + +//This sentence is ungrammatical but commonly used by non-native English speakers +He runs less quickly than John does. + _advmod(run, quickly) + _subj(run, he) + _subj(do, John) + _advmod(quickly, less) + than(he, John) + _comparative(quickly, run) + degree(quickly, comparative) + +He runs slower than John does. + _advmod(run, slow) + _subj(run, he) + _subj(do, John) + than(he, John) + _comparative(slow, run) + degree(slow, comparative) + +He runs more than John. + _obj(run, more) + _subj(run, he) + than(he, John) + _comparative(more, run) + degree(more, comparative) + +He runs less than John. + _obj(run, less) + _subj(run, he) + than(he, John) + _comparative(less, run) + degree(less, comparative) + +He runs faster than John. + than(he, John) + _comparative(fast, run) + _subj(run, he) + _advmod(run, fast) + degree(fast, comparative) + +He runs more slowly than John. + than(he, John) + _subj(run, he) + _advmod(slowly, more) + _comparative(slowly, run) + _advmod(run, slowly) + degree(slowly, comparative) + +He runs less slowly than John. + than(he, John) + _subj(run, he) + _comparative(slowly, run) + _advmod(run, slowly) + _advmod(slowly, less) + degree(slowly, comparative) + +He runs more miles than John does. + than(he, John) + _subj(run, he) + _subj(do, John) + _obj(run, mile) + _comparative(mile, run) + _quantity(mile, more) + degree(more, comparative) + +He runs less miles than John does. + than(he, John) + _subj(run, he) + _subj(do, John) + _obj(run, mile) + _comparative(mile, run) + _quantity(mile, less) + degree(less, comparative) + +He runs many more miles than John does. + than(he, John) + _comparative(mile, run) + _obj(run, mile) + _subj(run, he) + _subj(do, John) + _quantity(mile, many) + degree(more, comparative) + +He runs fewer miles than John does. + than(he, John) + _comparative(mile, run) + _obj(run, mile) + _subj(run, he) + _subj(do, John) + _quantity(mile, fewer) + degree(fewer, comparative) + +He runs ten more miles than John. + _obj(run, mile) + _subj(run, he) + _quantity(mile, more) + than(he, John) + _comparative(mile, run) + _num_quantity(miles, ten) + degree(more, comparative) + +He runs almost ten more miles than John does. + _obj(run, mile) + _subj(run, he) + _comparative(mile, run) + _subj(do, John) + than(he, John) + _quantity_mod(ten, almost) + _num_quantity(miles, ten) + degree(more, comparative) + +He runs more often than John. + _subj(run, he) + _advmod(often, more) + _advmod(run, often) + _comparative(often, run) + than(he, John) + degree(often, comparative) + +He runs less often than John. + _subj(run, he) + _advmod(often, less) + _advmod(run, often) + _comparative(often, run) + than(he, John) + degree(often, comparative) + +He runs here more often than John. + _advmod(run, here) + _advmod(often, more) + _advmod(run, often) + _subj(run, he) + _comparative(often, run) + than(he, John) + degree(often, comparative) + +He runs here less often than John. + _advmod(run, here) + _advmod(often, less) + _advmod(run, often) + _subj(run, he) + _comparative(often, run) + than(he, John) + degree(often, comparative) + +He is faster than John. + than(he, John) + _predadj(he, fast) + _comparative(fast, be) + degree(fast, comparative) + +He is faster than John is. + than(he, John) + _predadj(he, fast) + _subj(be, John) + _comparative(fast, be) + degree(fast, comparative) + +His speed is faster than John's. + than(speed, be) + _predadj(speed, fast) + _poss(speed, him) + _comparative(fast, be) + degree(fast, comparative) + +I run more than Ben. + _subj(run, I) + _obj(run, more) + _comparative(more, run) + than(I, Ben) + degree(more, comparative) + +I run less than Ben. + _subj(run, I) + _obj(run, less) + _comparative(less, run) + than(I, Ben) + degree(less, comparative) + +I run more miles than Ben. + _subj(run, I) + _obj(run, mile) + _quantity(mile, more) + _comparative(mile, run) + than(I, Ben) + degree(more, comparative) + +I run fewer miles than Ben. + _subj(run, I) + _obj(run, mile) + _quantity(mile, fewer) + _comparative(mile, run) + than(I, Ben) + degree(fewer, comparative) + +I run 10 more miles than Ben. + _subj(run, I) + _obj(run, mile) + _num_quantity(mile, 10) + _quantity_mod(10, more) + _comparative(mile, run) + than(I, Ben) + degree(more, comparative) + +I run 10 fewer miles than Ben. + _subj(run, I) + _obj(run, mile) + _num_quantity(mile, 10) + _quantity_mod(10, fewer) + _comparative(mile, run) + than(I, Ben) + degree(fewer, comparative) + +I run more often than Ben. + _subj(run, I) + _advmod(run, often) + _comparative(often, run) + than(I, Ben) + degree(often, comparative) + _advmod(often, more) + +I run less often than Ben. + _subj(run, I) + _advmod(run, often) + _comparative(often, run) + than(I, Ben) + degree(often, comparative) + _advmod(often, less) + +I run more often than Ben does. + _subj(run, I) + _subj(do, Ben) + _advmod(run, often) + _comparative(often, run) + than(I, Ben) + degree(often, comparative) + _advmod(often, more) + +I run less often than Ben does. + _subj(run, I) + _subj(do, Ben) + _advmod(run, often) + _comparative(often, run) + than(I, Ben) + degree(often, comparative) + _advmod(often, less) + +I run more often than Ben climbs. + _subj(run, I) + _subj(climb, Ben) + _comparative(often, run) + than(I, Ben) + than1(run, climb) + degree(often, comparative) + _advmod(run, often) + _advmod(often, more) + +I run less often than Ben climbs. + _subj(run, I) + _subj(climb, Ben) + _comparative(often, run) + than(I, Ben) + than1(run, climb) + degree(often, comparative) + _advmod(run, often) + _advmod(often, less) + +I run more races than Ben wins contests. + _subj(run, I) + _obj(run, race) + _subj(win, Ben) + _obj(win, contest) + _quantity(race, more) + _comparative(race, run) + than(I, Ben) + than1(run, climb) + than2(race, contest) + degree(more, comparative) + +I run fewer races than Ben wins contests. + _subj(run, I) + _obj(run, race) + _subj(win, Ben) + _obj(win, contest) + _quantity(race, fewer) + _comparative(race, run) + than(I, Ben) + than1(run, climb) + than2(race, contest) + degree(fewer, comparative) + +I have more chairs than Ben. + _obj(have, chair) + _subj(have, I) + than(I, Ben) + _comparative(chair, have) + _quantity(chair, more) + degree(more, comparative) + +I have fewer chairs than Ben. + _obj(have, chair) + _subj(have, I) + than(I, Ben) + _comparative(chair, have) + _quantity(chair, fewer) + degree(fewer, comparative) + +He earns much more money than I do. + _obj(earn, money) + _subj(do, I) + _subj(earn, he) + than(he,I) + _comparative(money,earn) + _quantity(money, more) + _advmod(more, much) + degree(more,comparative) + +He earns much less money than I do. + _obj(earn, money) + _subj(do, I) + _subj(earn, he) + than(he, I) + _comparative(money, earn) + _quantity(money, less) + _advmod(less, much) + degree(less, comparative) + +She comes here more often than her husband. + _advmod(come, here) + _advmod(often, more) + _advmod(come, often) + _subj(come, she) + _poss(husband, her) + _comparative(often, come) + than(she, husband) + degree(often, comparative) + +She comes here less often than her husband. + _advmod(come, here) + _advmod(often, less) + _advmod(come, often) + _subj(come, she) + _poss(husband, her) + _comparative(often, come) + than(she, husband) + degree(often, comparative) + +//When link-grammar uses A, relex should use _amod it will use A instead of AN; will be updated in next linkgrammer version +Russian grammar is more difficult than English grammar. + _comparative(difficult, grammar) + than(grammar, grammar) + _amod(grammar, Russian) + _predadj(grammar, difficult) + _amod(grammar, English) + degree(difficult, comparative) + +Russian grammar is less difficult than English grammar. + _comparative(difficult, grammar) + than(grammar, grammar) + _amod(grammar, Russian) + _predadj(grammar, difficult) + _amod(grammar, English) + _advmod(difficult, less) + degree(difficult, comparative) + +My sister is much more intelligent than me. + _amod(much, intelligent) + _predadj(sister, intelligent) + _poss(sister, me) + than(sister, me) + _comparative(intelligent, sister) + degree(intelligent, comparative) + +My sister is much less intelligent than me. + _amod(much, intelligent) + _predadj(sister, intelligent) + _poss(sister, me) + than(sister, me) + _comparative(intelligent, sister) + _advmod(intelligent, less) + degree(intelligent, comparative) + +I find maths lessons more enjoyable than science lessons. + _iobj(find, maths) + _obj(find, lesson) + _subj(find, I) + _amod(lesson, enjoyable) + _nn(lesson, science) + than(maths, science) + _comparative(enjoyable, maths) + degree(enjoyable, comparative) + +I find maths lessons less enjoyable than science lessons. + _iobj(find, maths) + _obj(find, lesson) + _subj(find, I) + _amod(lesson, enjoyable) + _nn(lesson, science) + than(maths, science) + _comparative(enjoyable, maths) + _advmod(enjoyable, less) + degree(enjoyable, comparative) diff --git a/src/test/resources/relex-conjunction.tsv b/src/test/resources/relex-conjunction.tsv new file mode 100644 index 000000000..db1cca574 --- /dev/null +++ b/src/test/resources/relex-conjunction.tsv @@ -0,0 +1,101 @@ +Sentence Relations +//conjoined verbs +Scientists make observations and ask questions. + _obj(make, observation) + _obj(ask, question) + _subj(make, scientist) + _subj(ask, scientist) + conj_and(make, ask) +//conjoined nouns +She is a student and an employee. + _obj(be, student) + _obj(be, employee) + _subj(be, she) + conj_and(student, employee) +//conjoined adjectives +I hailed a black and white taxi. + _obj(hail, taxi) + _subj(hail, I) + _amod(taxi, black) + _amod(taxi, white) + conj_and(black, white) +//conjoined adverbs +She ran quickly and quietly. + _advmod(run, quickly) + _advmod(run, quietly) + _subj(run, she) + conj_and(quickly, quietly) +//adjectival modifiers on conjoined subject +The big truck and the little car collided. + _amod(car, little) + _amod(truck, big) + _subj(collide, truck) + _subj(collide, car) + conj_and(truck, car) +//verbs with modifiers +We ate dinner at home and went to the movies. + _obj(eat, dinner) + conj_and(eat, go) + at(eat, home) + _subj(eat, we) + to(go, movie) + _subj(go, we) +//verb with more modifiers +We ate a late dinner at home and went out to the movies afterwards. + _obj(eat, dinner) + conj_and(eat, go_out) + at(eat, home) + _subj(eat, we) + to(go_out, movie) + _advmod(go_out, afterwards) + _subj(go_out, we) + _amod(dinner, late) + +//conjoined ditransitive verbs +She baked him a cake and sang him a song. + _iobj(sing, him) + _obj(sing, song) + _subj(sing, she) + _iobj(bake, him) + _obj(bake, cake) + conj_and(bake, sing) + _subj(bake, she) +//conjoined adverbs with modifiers +she ran very quickly and extremely quietly. + _advmod(run, quickly) + _advmod(run, quietly) + _subj(run, she) + _advmod(quietly, extremely) + conj_and(quickly, quietly) + _advmod(quickly, very) +//conjoined adverbs with out modifiers +She handled it quickly and gracefully. + _obj(handle, quickly) + _obj(handle, gracefully) + _advmod(handle, quickly) + _advmod(handle, gracefully) + _subj(handle, she) + conj_and(quickly, gracefully) +//modifiers on conjoined adjectives +He had very long and very white hair. + _obj(have, hair) + _subj(have, he) + _amod(hair, long) + _amod(hair, white) + _advmod(white, very) + conj_and(long, white) + _advmod(long, very) +//adjectival modifiers on conjoined object +The collision was between the little car and the big truck. + _pobj(between, car) + _pobj(between, truck) + _psubj(between, collision) + _amod(truck, big) + _amod(car, little) + conj_and(car, truck) +//Names Modifiers and conjunction +Big Tom and Angry Sue went to the movies. + to(go, movie) + _subj(go, Big_Tom) + _subj(go, Angry_Sue) + conj_and(Big_Tom, Angry_Sue) diff --git a/src/test/resources/relex-extraposition.tsv b/src/test/resources/relex-extraposition.tsv new file mode 100644 index 000000000..bad7b8afb --- /dev/null +++ b/src/test/resources/relex-extraposition.tsv @@ -0,0 +1,107 @@ +Sentence Relations +The woman who lives next door is a registered nurse. + _obj(be, nurse) + _subj(be, woman) + _amod(nurse, registered) + _advmod(live, next_door) + _subj(live, woman) + who(woman, live) + +A player who is injured has to leave the field. + _to-do(have, leave) + _subj(have, player) + _obj(leave, field) + _predadj(player, injured) + who(player, injured) + +Pizza, which most people love, is not very healthy. + _advmod(very, not) + _advmod(healthy, very) + _obj(love, Pizza) + _quantity(people, most) + which(Pizza, love) + _subj(love, people) + _predadj(Pizza, healthy) + +The restaurant which belongs to my aunt is very famous. + _advmod(famous, very) + to(belong, aunt) + _subj(belong, restaurant) + _poss(aunt, me) + which(restaurant, belong) + _predadj(restaurant, famous) + +The books which I read in the library were written by Charles Dickens. + _obj(write, book) + by(write, Charles_Dickens) + _obj(read, book) + in(read, library) + _subj(read, I) + which(book, read) + +This is the book whose author I met in a library. + _obj(be, book) + _subj(be, this) + _obj(meet, author) + in(meet, library) + _subj(meet, I) + whose(book, author) + +The book that Jack lent me is very boring. + _advmod(boring, very) + _iobj(lend, book) + _obj(lend, me) + _subj(lend, Jack) + that(book, lend) + _predadj(book, boring) + +They ate a special curry which was recommended by the restaurant’s owner. + _obj(eat, curry) + _subj(eat, they) + _obj(recommend, curry) + by(recommend, owner) + _poss(owner, restaurant) + which(curry, recommend) + _amod(curry, special) + +The dog who Jack said chased me was black. + _obj(chase, me) + _subj(chase, dog) + _subj(say, Jack) + _predadj(dog, black) + who(dog, chase) + +Jack, who hosted the party, is my cousin. + _obj(be, cousin) + _subj(be, Jack) + _poss(cousin, me) + _obj(host, party) + _subj(host, Jack) + who(Jack, host) + +Jack, whose name is in that book, is the student near the window. + near(be, window) + _obj(be, student) + _subj(be, Jack) + _pobj(in, book) + _psubj(in, name) + _det(book, that) + whose(Jack, name) + +Jack stopped the police car that was driving fast. + _obj(stop, car) + _subj(stop, Jack) + _advmod(drive, fast) + _subj(drive, car) + that(car, drive) + _nn(car, police) + +Just before the crossroads, the car was stopped by a traffic sign that stood on the street. + _obj(stop, car) + by(stop, sign) + _advmod(stop, just) + on(stand, street) + _subj(stand, sign) + that(sign, stand) + _nn(sign, traffic) + before(just, crossroads) diff --git a/src/test/resources/stanford-tagged.tsv b/src/test/resources/stanford-tagged.tsv new file mode 100644 index 000000000..9e5dc47fa --- /dev/null +++ b/src/test/resources/stanford-tagged.tsv @@ -0,0 +1,91 @@ +// ========================================================= +// PENN PART_OF_SPEECH TAGGING +// ========================================================= +// +Truffles picked during the spring are tasty. + nsubj(tasty-7-JJ, truffles-1-NNS) + partmod(truffles-1-NNS, picked-2-VBN) + det(spring-5-NN, the-4-DT) + prep_during(picked-2-VBN, spring-5-NN) + cop(tasty-7-JJ, are-6-VBP) + +I ate twelve truffles. + nsubj(ate-2-VBD, I-1-PRP) + num(truffles-4-NNS, twelve-3-CD) + dobj(ate-2-VBD, truffles-4-NNS) + +I have eaten twelve truffles. + nsubj(eaten-3-VBN, I-1-PRP) + aux(eaten-3-VBN, have-2-VBP) + num(truffles-5-NNS, twelve-4-CD) + dobj(eaten-3-VBN, truffles-5-NNS) + +I had eaten twelve truffles. + nsubj(eaten-3-VBN, I-1-PRP) + aux(eaten-3-VBN, had-2-VBD) + num(truffles-5-NNS, twelve-4-CD) + dobj(eaten-3-VBN, truffles-5-NNS) + +The truffles were eaten. + det(truffles-2-NNS, the-1-DT) + nsubjpass(eaten-4-VBN, truffles-2-NNS) + auxpass(eaten-4-VBN, were-3-VBD) + + +// Full disclosure:Stanford currently generates +// dep(time-4-NN, young-8-JJ) which just means it doesn't know +// the right answer (which is advcl, right?). +// It also generates advmod(young-8-JJ, when-5-WRB) in addition +// to rel(young-8-JJ, when-5-WRB) which is not quite right +// either. +There was a time when we were young. + expl(was-2-VBD, there-1-EX) + det(time-4-NN, a-3-DT) + nsubj(was-2-VBD, time-4-NN) + rel(young-8-JJ, when-5-WRB) + nsubj(young-8-JJ, we-6-PRP) + cop(young-8-JJ, were-7-VBD) + advcl(time-4-NN, young-8-JJ) + +Is there a better way? + expl(is-1-VBZ, there-2-EX) + det(way-5-NN, a-3-DT) + amod(way-5-NN, better-4-JJR) + nsubj(is-1-VBZ, way-5-NN) + +Is this the largest you can find? + cop(largest-4-JJS, is-1-VBZ) + nsubj(largest-4-JJS, this-2-DT) + det(largest-4-JJS, the-3-DT) + nsubj(find-7-VB, you-5-PRP) + aux(find-7-VB, can-6-MD) + rcmod(largest-4-JJS, find-7-VB) + +But my efforts to win his heart have failed. + poss(efforts-3-NNS, my-2-PRP$) + nsubj(failed-9-VBN, efforts-3-NNS) + aux(win-5-VB, to-4-TO) + infmod(efforts-3-NNS, win-5-VB) + poss(heart-7-NN, his-6-PRP$) + dobj(win-5-VB, heart-7-NN) + aux(failed-9-VBN, have-8-VBP) + +The undergrads are occasionally late. + det(undergrads-2-NNS, the-1-DT) + nsubj(late-5-JJ, undergrads-2-NNS) + cop(late-5-JJ, are-3-VBP) + advmod(late-5-JJ, occasionally-4-RB) + +The height of Mount Everest is 8,848 metres. + det(height-2-NN, the-1-DT) + nsubj(metres-8-NNS, height-2-NN) + nn(Everest-5-NNP, Mount-4-NNP) + prep_of(height-2-NN, Everest-5-NNP) + cop(metres-8-NNS, is-6-VBZ) + num(metres-8-NNS, 8,848-7-CD) + +It happened on December 3rd, 1990. + nsubj(happened-2-VBD, it-1-PRP) + prep_on(happened-2-VBD, December-4-NNP) + num(December-4-NNP, 3rd-5-CD) + num(December-4-NNP, 1990-7-CD) diff --git a/src/test/resources/stanford-untagged.tsv b/src/test/resources/stanford-untagged.tsv new file mode 100644 index 000000000..b3fa3afb6 --- /dev/null +++ b/src/test/resources/stanford-untagged.tsv @@ -0,0 +1,402 @@ +// The parses below were compared to the Stanford parser, circa +// 2009.Since then, it is likely that Stanford has changed. +// The tests below should not be changed, unless a) they are +// changed to be compatible with current-day Stanford, and b) they +// pass. +Who invented sliced bread? + nsubj(invented-2, who-1) + amod(bread-4, sliced-3) + dobj(invented-2, bread-4) + +Jim runs quickly. + nsubj(runs-2, Jim-1) + advmod(runs-2, quickly-3) + +The bird, a robin, sang sweetly. + det(bird-2, the-1) + nsubj(sang-7, bird-2) + det(robin-5, a-4) + appos(bird-2, robin-5) + advmod(sang-7, sweetly-8) +// wtf ?? dep is not documented .. not sure what to do here ... +There is a place we can go. + expl(is-2, there-1) + det(place-4, a-3) + nsubj(is-2, place-4) + nsubj(go-7, we-5) + aux(go-7, can-6) +//dep(is-2, go-7) + + +The linebacker gave the quarterback a push. + det(linebacker-2, the-1) + nsubj(gave-3, linebacker-2) + det(quarterback-5, the-4) + iobj(gave-3, quarterback-5) + det(push-7, a-6) + dobj(gave-3, push-7) + +He stood at the goal line. + nsubj(stood-2, he-1) + det(line-6, the-4) + nn(line-6, goal-5) + prep_at(stood-2, line-6) + +// acomp example from Stanford docs +She looks very beautiful. + nsubj(looks-2, she-1) + advmod(beautiful-4, very-3) + acomp(looks-2, beautiful-4) + +// advcl example from Stanford docs +The accident happened as the night was falling. + det(accident-2, the-1) + nsubj(happened-3, accident-2) + mark(falling-8, as-4) + det(night-6, the-5) + nsubj(falling-8, night-6) + aux(falling-8, was-7) + advcl(happened-3, falling-8) + +// advcl example from Stanford docs +If you know who did it, you should tell the teacher. + mark(know-3, if-1) + nsubj(know-3, you-2) + advcl(tell-10, know-3) + nsubj(did-5, who-4) + ccomp(know-3, did-5) + dobj(did-5, it-6) + nsubj(tell-10, you-8) + aux(tell-10, should-9) + det(teacher-12, the-11) + dobj(tell-10, teacher-12) + +// agent example from Stanford docs +The man has been killed by the police. + det(man-2, the-1) + nsubjpass(killed-5, man-2) + aux(killed-5, has-3) + auxpass(killed-5, been-4) + det(police-8, the-7) + agent(killed-5, police-8) + +Effects caused by the protein are important. + nsubj(important-7, effects-1) + partmod(effects-1, caused-2) + det(protein-5, the-4) + agent(caused-2, protein-5) + cop(important-7, are-6) + +Sam, my brother, has arrived. + nsubj(arrived-7, Sam-1) + poss(brother-4, my-3) + appos(Sam-1, brother-4) + aux(arrived-7, has-6) + +What is that? + attr(is-2, what-1) + nsubj(is-2, that-3) + +Reagan has died. + nsubj(died-3, Reagan-1) + aux(died-3, has-2) + +He should leave. + nsubj(leave-3, he-1) + aux(leave-3, should-2) + +Kennedy has been killed. + nsubjpass(killed-4, Kennedy-1) + aux(killed-4, has-2) + auxpass(killed-4, been-3) + +Kennedy was killed. + nsubjpass(killed-3, Kennedy-1) + auxpass(killed-3, was-2) + +Kennedy got killed. + nsubjpass(killed-3, Kennedy-1) + auxpass(killed-3, got-2) + +Bill is big. + nsubj(big-3, Bill-1) + cop(big-3, is-2) + +Bill is an honest man. + nsubj(man-5, Bill-1) + cop(man-5, is-2) + det(man-5, an-3) + amod(man-5, honest-4) + +What she said makes sense. + dobj(said-3, what-1) + nsubj(said-3, she-2) + csubj(makes-4, said-3) + dobj(makes-4, sense-5) + +What she said is not true. + dobj(said-3, what-1) + nsubj(said-3, she-2) + csubj(true-6, said-3) + cop(true-6, is-4) + neg(true-6, not-5) + +Which book do you prefer? + det(book-2, which-1) + dobj(prefer-5, book-2) + aux(prefer-5, do-3) + nsubj(prefer-5, you-4) + +There is a ghost in the room. + expl(is-2, there-1) + det(ghost-4, a-3) + nsubj(is-2, ghost-4) + det(room-7, the-6) + prep_in(is-2, room-7) + +She gave me a raise. + nsubj(gave-2, she-1) + iobj(gave-2, me-3) + det(raise-5, a-4) + dobj(gave-2, raise-5) + +The director is 65 years old. + det(director-2, the-1) + nsubj(old-6, director-2) + cop(old-6, is-3) + num(years-5, 65-4) + measure(old-6, years-5) + +Sam eats 3 sheep. + nsubj(eats-2, Sam-1) + num(sheep-4, 3-3) + dobj(eats-2, sheep-4) + +//**************** +// * I don't get it. Stanford makes a num/number distinction I can't grok. +// I lost $ 3.2 billion. +//nsubj(lost-2, I-1) +//dobj(lost-2, $-3) +//number($-3, 3.2-4) +//number($-3, billion-5) +//***********/ + +Truffles picked during the spring are tasty. + nsubj(tasty-7, truffles-1) + partmod(truffles-1, picked-2) + det(spring-5, the-4) + prep_during(picked-2, spring-5) + cop(tasty-7, are-6) + +//**************** +// * Currently fails due to xcomp generation problems +// * +// We went to their offices to get Bill's clothes. +//nsubj(went-2, we-1) +//xsubj(get-7, we-1) +//poss(offices-5, their-4) +//prep_to(went-2, offices-5) +//aux(get-7, to-6) +//xcomp(went-2, get-7) +//poss(clothes-10, Bill-8) +//dobj(get-7, clothes-10) +//***********/ + +//**************** +// * See README-Stanford for details. +// All the boys are here. +//predet(boys-3, all-1) +//det(boys-3, the-2) +//nsubj(are-4, boys-3) +//advmod(are-4, here-5) +//***********/ + +//**************** +// * These are ambiguous parses. +// * Stanford picks the opposite choice from Relex. +// * See the README-Stanford for a discussion. +// I saw a cat in a hat. +//nsubj(saw-2, I-1) +//det(cat-4, a-3) +//dobj(saw-2, cat-4) +//det(hat-7, a-6) +//prep_in(cat-4, hat-7) +// +// I saw a cat with a telescope. +//nsubj(saw-2, I-1) +//det(cat-4, a-3) +//dobj(saw-2, cat-4) +//det(telescope-7, a-6) +//prep_with(cat-4, telescope-7) +//***********/ + +He is responsible for meals. + nsubj(responsible-3, he-1) + cop(responsible-3, is-2) + prep_for(responsible-3, meals-5) + +They shut down the station. + nsubj(shut-2, they-1) + prt(shut-2, down-3) + det(station-5, the-4) + dobj(shut-2, station-5) + +About 200 people came to the party. + quantmod(200-2, about-1) + num(people-3, 200-2) + nsubj(came-4, people-3) + det(party-7, the-6) + prep_to(came-4, party-7) + +I saw the man who you love. + nsubj(saw-2, I-1) + det(man-4, the-3) + dobj(saw-2, man-4) + dobj(love-7, man-4) + rel(love-7, who-5) + nsubj(love-7, you-6) + rcmod(man-4, love-7) + + +//**************** +// * +// * relex is failing to generate the following: +// * Almost got it w/the B** rules but not quite ... +// +//rel(love-8, wife-6) +//rcmod(man-4, love-8) +// +// I saw the man whose wife you love. +//nsubj(saw-2, I-1) +//det(man-4, the-3) +//dobj(saw-2, man-4) +//poss(wife-6, whose-5) +//dobj(love-8, wife-6) +//rel(love-8, wife-6) +//nsubj(love-8, you-7) +//rcmod(man-4, love-8) +//***********/ + +I am ready to leave. + nsubj(ready-3, I-1) + cop(ready-3, am-2) + aux(leave-5, to-4) + xcomp(ready-3, leave-5) + +Tom likes to eat fish. + nsubj(likes-2, Tom-1) + xsubj(eat-4, Tom-1) + aux(eat-4, to-3) + xcomp(likes-2, eat-4) + dobj(eat-4, fish-5) + +//**************** +// He says that you like to swim. +//nsubj(says-2, he-1) +//complm(like-5, that-3) +//nsubj(like-5, you-4) +//ccomp(says-2, like-5) +//nsubj(swim-7, to-6) // NFW that this can be right. +//ccomp(like-5, swim-7) +//***********/ + +//**************** +// The garage is next to the house. +//det(garage-2, the-1) +//nsubj(next-4, garage-2) +//cop(next-4, is-3) +//det(house-7, the-6) +//prep_to(next-4, house-7) +//***********/ + +// ========================================================= +// PENN PART_OF_SPEECH TAGGING +// ========================================================= +// +Truffles picked during the spring are tasty. + nsubj(tasty-7-JJ, truffles-1-NNS) + partmod(truffles-1-NNS, picked-2-VBN) + det(spring-5-NN, the-4-DT) + prep_during(picked-2-VBN, spring-5-NN) + cop(tasty-7-JJ, are-6-VBP) + +I ate twelve truffles. + nsubj(ate-2-VBD, I-1-PRP) + num(truffles-4-NNS, twelve-3-CD) + dobj(ate-2-VBD, truffles-4-NNS) + +I have eaten twelve truffles. + nsubj(eaten-3-VBN, I-1-PRP) + aux(eaten-3-VBN, have-2-VBP) + num(truffles-5-NNS, twelve-4-CD) + dobj(eaten-3-VBN, truffles-5-NNS) + +I had eaten twelve truffles. + nsubj(eaten-3-VBN, I-1-PRP) + aux(eaten-3-VBN, had-2-VBD) + num(truffles-5-NNS, twelve-4-CD) + dobj(eaten-3-VBN, truffles-5-NNS) + +The truffles were eaten. + det(truffles-2-NNS, the-1-DT) + nsubjpass(eaten-4-VBN, truffles-2-NNS) + auxpass(eaten-4-VBN, were-3-VBD) + + +// Full disclosure:Stanford currently generates +// dep(time-4-NN, young-8-JJ) which just means it doesn't know +// the right answer (which is advcl, right?). +// It also generates advmod(young-8-JJ, when-5-WRB) in addition +// to rel(young-8-JJ, when-5-WRB) which is not quite right +// either. +There was a time when we were young. + expl(was-2-VBD, there-1-EX) + det(time-4-NN, a-3-DT) + nsubj(was-2-VBD, time-4-NN) + rel(young-8-JJ, when-5-WRB) + nsubj(young-8-JJ, we-6-PRP) + cop(young-8-JJ, were-7-VBD) + advcl(time-4-NN, young-8-JJ) + +Is there a better way? + expl(is-1-VBZ, there-2-EX) + det(way-5-NN, a-3-DT) + amod(way-5-NN, better-4-JJR) + nsubj(is-1-VBZ, way-5-NN) + +Is this the largest you can find? + cop(largest-4-JJS, is-1-VBZ) + nsubj(largest-4-JJS, this-2-DT) + det(largest-4-JJS, the-3-DT) + nsubj(find-7-VB, you-5-PRP) + aux(find-7-VB, can-6-MD) + rcmod(largest-4-JJS, find-7-VB) + +But my efforts to win his heart have failed. + poss(efforts-3-NNS, my-2-PRP$) + nsubj(failed-9-VBN, efforts-3-NNS) + aux(win-5-VB, to-4-TO) + infmod(efforts-3-NNS, win-5-VB) + poss(heart-7-NN, his-6-PRP$) + dobj(win-5-VB, heart-7-NN) + aux(failed-9-VBN, have-8-VBP) + +The undergrads are occasionally late. + det(undergrads-2-NNS, the-1-DT) + nsubj(late-5-JJ, undergrads-2-NNS) + cop(late-5-JJ, are-3-VBP) + advmod(late-5-JJ, occasionally-4-RB) + +The height of Mount Everest is 8,848 metres. + det(height-2-NN, the-1-DT) + nsubj(metres-8-NNS, height-2-NN) + nn(Everest-5-NNP, Mount-4-NNP) + prep_of(height-2-NN, Everest-5-NNP) + cop(metres-8-NNS, is-6-VBZ) + num(metres-8-NNS, 8,848-7-CD) + +It happened on December 3rd, 1990. + nsubj(happened-2-VBD, it-1-PRP) + prep_on(happened-2-VBD, December-4-NNP) + num(December-4-NNP, 3rd-5-CD) + num(December-4-NNP, 1990-7-CD)