Skip to content

Commit

Permalink
Move test fixtures into TSV plain text files.
Browse files Browse the repository at this point in the history
Test data for RelEx and Stanford are moved into TSV files.

The Test classes (`TestRelEx` and `TestStanford`), in addition to having most of its content factored out, also had unnormalized line endings, so to Git these classes look replaced entirely.

The entirety of test data now reside in these `.tsv` files. I think the format should be self-explanatory just by looking at it.

Ant build and tests still work.

@linas I hope this is acceptable.

Fixed opencog#103.
  • Loading branch information
ceefour committed Jul 14, 2014
1 parent a5939b9 commit 5209d00
Show file tree
Hide file tree
Showing 13 changed files with 1,690 additions and 1,490 deletions.
34 changes: 20 additions & 14 deletions build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
<property name="debuglevel" value="source,lines,vars"/>
<property name="target" value="1.5"/> <!-- this 1.5 is java, not relex -->
<property name="source" value="1.5"/> <!-- this 1.5 is java, not relex -->
<property name="maven.profile" value="java5"/>

<path id="relex.classpath">
<pathelement location="bin"/>
Expand Down Expand Up @@ -77,7 +78,7 @@
</not>
</condition>
</fail>
<artifact:settings file="java5.settings.xml"/>
<artifact:settings file="${maven.profile}.settings.xml"/>
<artifact:pom file="pom.xml"/>
<artifact:resolve>
<path refid="cp.compile" classpath="compile"/>
Expand Down Expand Up @@ -180,22 +181,27 @@
<target name="realclean" depends="clean"/>

<!-- Run tests -->
<target name="test" depends="build-tests">
<java classname="relex.test.TestStanford" failonerror="true" fork="yes">
<jvmarg line="-Xmx1024m"/>
<jvmarg line="-Djava.library.path=/usr/lib/jni:/usr/lib:/usr/local/lib/jni:/usr/local/lib"/>
<classpath refid="cp.test"/>
<target name="test-stanford" depends="build-tests">
<junit fork="true" printsummary="true" showoutput="true" maxmemory="1024m">
<jvmarg value="-Djava.library.path=/usr/lib/jni:/usr/lib:/usr/local/lib/jni:/usr/local/lib"/>
<classpath refid="cp.test"/>
<classpath refid="relex.classpath"/>
<arg line=""/>
</java>
<java classname="relex.test.TestRelEx" failonerror="true" fork="yes">
<jvmarg line="-Xmx1024m"/>
<jvmarg line="-Djava.library.path=/usr/lib/jni:/usr/lib:/usr/local/lib/jni:/usr/local/lib"/>
<classpath refid="cp.test"/>
<classpath path="src/test/resources"/>
<formatter type="brief" usefile="false"/>
<test name="relex.test.TestStanford"/>
</junit>
</target>
<target name="test-relex" depends="build-tests">
<junit fork="true" printsummary="true" showoutput="true" maxmemory="1024m">
<jvmarg value="-Djava.library.path=/usr/lib/jni:/usr/lib:/usr/local/lib/jni:/usr/local/lib"/>
<classpath refid="cp.test"/>
<classpath refid="relex.classpath"/>
<arg line=""/>
</java>
<classpath path="src/test/resources"/>
<formatter type="brief" usefile="false"/>
<test name="relex.test.TestRelEx"/>
</junit>
</target>
<target name="test" depends="test-stanford, test-relex"/>

<!-- Build a jar file, for public consumption -->
<target name="jar" depends="build-project">
Expand Down
4 changes: 4 additions & 0 deletions java7.settings.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<settings xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/settings-1.0.0.xsd">
</settings>
41 changes: 24 additions & 17 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,17 @@
</resources>
</build>
<dependencies>
<dependency>
<groupId>net.sf.jwordnet</groupId>
<artifactId>jwnl</artifactId>
<version>1.4_rc3</version>
<exclusions>
<exclusion>
<artifactId>commons-logging</artifactId>
<groupId>commons-logging</groupId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>net.sf.opencsv</groupId>
Expand Down Expand Up @@ -239,6 +250,12 @@
<activeByDefault>true</activeByDefault>
</activation>
<dependencies>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>17.0</version>
</dependency>

<dependency>
<groupId>org.apache.odftoolkit</groupId>
<artifactId>odfdom-java</artifactId>
Expand Down Expand Up @@ -317,17 +334,6 @@
<version>3.2</version>
</dependency>

<dependency>
<groupId>net.sf.jwordnet</groupId>
<artifactId>jwnl</artifactId>
<version>1.4_rc3</version>
<exclusions>
<exclusion>
<artifactId>commons-logging</artifactId>
<groupId>commons-logging</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.linkgrammar</groupId>
<artifactId>linkgrammar</artifactId>
Expand All @@ -349,12 +355,6 @@
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava-jdk5</artifactId>
<version>17.0</version>
</dependency>
</dependencies>
</profile>
<profile>
Expand All @@ -371,6 +371,13 @@
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava-jdk5</artifactId>
<version>17.0</version>
</dependency>
</dependencies>
</profile>
</profiles>
</project>
125 changes: 125 additions & 0 deletions src/java_test/relex/test/RelExCases.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
package relex.test;

import java.io.InputStreamReader;
import java.net.URL;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import au.com.bytecode.opencsv.CSVReader;
import au.com.bytecode.opencsv.CSVWriter;

import com.google.common.base.Function;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;

/**
* @author ceefour
*
*/
public class RelExCases {

private static final Logger log = LoggerFactory
.getLogger(RelExCases.class);

static class RelExCase {
String sentence;
Set<String> relations;
Optional<String> description;

RelExCase(String sentence, Set<String> relations, Optional<String> description) {
super();
this.description = description;
this.sentence = sentence;
this.relations = relations;
}
}

static class RelExCaseToObjectArray implements Function<RelExCase, Object[]> {
public Object[] apply(RelExCase input) {
return new Object[] { input.sentence, input.relations, input.description };
}
}

protected static ImmutableList<RelExCase> parseTsv(String casesPath) {
URL casesTsv = Preconditions.checkNotNull(RelExCases.class.getResource(casesPath),
"Cannot load '%s' from classpath", casesPath);
log.info("Loading '{}'...", casesTsv);
try {
CSVReader reader = new CSVReader(new InputStreamReader(casesTsv.openStream()),
'\t', CSVWriter.DEFAULT_QUOTE_CHARACTER, CSVWriter.NO_ESCAPE_CHARACTER);
try {
Optional<String> curDescription = Optional.absent();
Optional<String> curSentence = Optional.absent();
ImmutableSet.Builder<String> curRelations = ImmutableSet.builder();
ImmutableList.Builder<RelExCase> cases = ImmutableList.builder();
reader.readNext(); // skip header line
while (true) {
String[] row = reader.readNext();
if (row == null) {
break;
}
if (row.length == 0) {
continue;
}
if (row[0].startsWith("//")) {
// add previous sentence
if (curSentence.isPresent()) {
cases.add(new RelExCase(curSentence.get(), curRelations.build(), curDescription));
curSentence = Optional.absent();
curRelations = ImmutableSet.builder();
}
curDescription = Optional.of(row[0].substring(2).trim());
continue;
}
if (!row[0].trim().isEmpty()) {
// add previous sentence
if (curSentence.isPresent()) {
cases.add(new RelExCase(curSentence.get(), curRelations.build(), curDescription));
curSentence = Optional.absent();
curRelations = ImmutableSet.builder();
}
// sentence row
curSentence = Optional.of(row[0].trim());
} else if (row.length >= 2 && !row[1].trim().isEmpty()) {
// relation row
curRelations.add(row[1].trim());
}
}
// add previous sentence
if (curSentence.isPresent()) {
cases.add(new RelExCase(curSentence.get(), curRelations.build(), curDescription));
curSentence = Optional.absent();
curRelations = ImmutableSet.builder();
}
final ImmutableList<RelExCase> caseList = cases.build();
log.info("Got {} cases from '{}'", caseList.size(), casesPath);
return caseList;
} finally {
reader.close();
}
} catch (Exception e) {
throw new RuntimeException("Cannot read " + casesTsv, e);
}
}

public static Object[] provideComparatives() {
return FluentIterable.from(parseTsv("/relex-comparatives.tsv"))
.transform(new RelExCaseToObjectArray()).toArray(Object[].class);
}

public static Object[] provideConjunction() {
return FluentIterable.from(parseTsv("/relex-conjunction.tsv"))
.transform(new RelExCaseToObjectArray()).toArray(Object[].class);
}

public static Object[] provideExtraposition() {
return FluentIterable.from(parseTsv("/relex-extraposition.tsv"))
.transform(new RelExCaseToObjectArray()).toArray(Object[].class);
}

}
120 changes: 120 additions & 0 deletions src/java_test/relex/test/StanfordCases.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
package relex.test;

import java.io.InputStreamReader;
import java.net.URL;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import au.com.bytecode.opencsv.CSVReader;
import au.com.bytecode.opencsv.CSVWriter;

import com.google.common.base.Function;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;

/**
* @author ceefour
*
*/
public class StanfordCases {

private static final Logger log = LoggerFactory
.getLogger(StanfordCases.class);

static class StanfordCase {
String sentence;
Set<String> relations;
Optional<String> description;

StanfordCase(String sentence, Set<String> relations, Optional<String> description) {
super();
this.description = description;
this.sentence = sentence;
this.relations = relations;
}
}

static class StanfordCaseToObjectArray implements Function<StanfordCase, Object[]> {
public Object[] apply(StanfordCase input) {
return new Object[] { input.sentence, input.relations, input.description };
}
}

protected static ImmutableList<StanfordCase> parseTsv(String casesPath) {
URL casesTsv = Preconditions.checkNotNull(StanfordCases.class.getResource(casesPath),
"Cannot load '%s' from classpath", casesPath);
log.info("Loading '{}'...", casesTsv);
try {
CSVReader reader = new CSVReader(new InputStreamReader(casesTsv.openStream()),
'\t', CSVWriter.DEFAULT_QUOTE_CHARACTER, CSVWriter.NO_ESCAPE_CHARACTER);
try {
Optional<String> curDescription = Optional.absent();
Optional<String> curSentence = Optional.absent();
ImmutableSet.Builder<String> curRelations = ImmutableSet.builder();
ImmutableList.Builder<StanfordCase> cases = ImmutableList.builder();
reader.readNext(); // skip header line
while (true) {
String[] row = reader.readNext();
if (row == null) {
break;
}
if (row.length == 0) {
continue;
}
if (row[0].startsWith("//")) {
// add previous sentence
if (curSentence.isPresent()) {
cases.add(new StanfordCase(curSentence.get(), curRelations.build(), curDescription));
curSentence = Optional.absent();
curRelations = ImmutableSet.builder();
}
curDescription = Optional.of(row[0].substring(2).trim());
continue;
}
if (!row[0].trim().isEmpty()) {
// add previous sentence
if (curSentence.isPresent()) {
cases.add(new StanfordCase(curSentence.get(), curRelations.build(), curDescription));
curSentence = Optional.absent();
curRelations = ImmutableSet.builder();
}
// sentence row
curSentence = Optional.of(row[0].trim());
} else if (row.length >= 2 && !row[1].trim().isEmpty()) {
// relation row
curRelations.add(row[1].trim());
}
}
// add previous sentence
if (curSentence.isPresent()) {
cases.add(new StanfordCase(curSentence.get(), curRelations.build(), curDescription));
curSentence = Optional.absent();
curRelations = ImmutableSet.builder();
}
final ImmutableList<StanfordCase> caseList = cases.build();
log.info("Got {} cases from '{}'", caseList.size(), casesPath);
return caseList;
} finally {
reader.close();
}
} catch (Exception e) {
throw new RuntimeException("Cannot read " + casesTsv, e);
}
}

public static Object[] provideUntagged() {
return FluentIterable.from(parseTsv("/stanford-untagged.tsv"))
.transform(new StanfordCaseToObjectArray()).toArray(Object[].class);
}

public static Object[] provideTagged() {
return FluentIterable.from(parseTsv("/stanford-tagged.tsv"))
.transform(new StanfordCaseToObjectArray()).toArray(Object[].class);
}

}
Loading

0 comments on commit 5209d00

Please sign in to comment.