From 12276920a297f03e02aa88ca9e433cb0d0d4c2e0 Mon Sep 17 00:00:00 2001 From: tballison Date: Wed, 24 May 2023 09:35:22 -0400 Subject: [PATCH 1/2] ANY23-610 -- Upgrade Tika to 2.8.0 --- .../apache/any23/cli/MimeDetectorTest.java | 2 + .../java/org/apache/any23/cli/RoverTest.java | 2 + .../any23/extractor/yaml/YAMLExtractor.java | 3 +- .../test/java/org/apache/any23/Any23Test.java | 2 + pom.xml | 72 ++++++++++++++++--- 5 files changed, 71 insertions(+), 10 deletions(-) diff --git a/cli/src/test/java/org/apache/any23/cli/MimeDetectorTest.java b/cli/src/test/java/org/apache/any23/cli/MimeDetectorTest.java index 46b65daf7..618e22cb0 100644 --- a/cli/src/test/java/org/apache/any23/cli/MimeDetectorTest.java +++ b/cli/src/test/java/org/apache/any23/cli/MimeDetectorTest.java @@ -17,6 +17,7 @@ package org.apache.any23.cli; +import org.junit.Ignore; import org.junit.Test; /** @@ -31,6 +32,7 @@ public MimeDetectorTest() { } @Test + @Ignore("url now broken; redirects to https://twitter.com") public void testDetectURL() throws Exception { assumeOnlineAllowed(); runToolCheckExit0("http://twitter.com#micmos"); diff --git a/cli/src/test/java/org/apache/any23/cli/RoverTest.java b/cli/src/test/java/org/apache/any23/cli/RoverTest.java index f20586503..da11ff6fe 100644 --- a/cli/src/test/java/org/apache/any23/cli/RoverTest.java +++ b/cli/src/test/java/org/apache/any23/cli/RoverTest.java @@ -23,6 +23,7 @@ import org.apache.any23.util.URLUtils; import org.junit.Assert; import org.junit.Assume; +import org.junit.Ignore; import org.junit.Test; import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.rio.RDFFormat; @@ -112,6 +113,7 @@ public void testDelegatingWriterFactory() throws Exception { /* BEGIN: online tests. */ @Test + @Ignore("urls no longer work as expected") public void testRunMultiURLs() throws Exception { // Assuming first accessibility to remote resources. assumeOnlineAllowed(); diff --git a/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java b/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java index 74fb1887a..ebb7a4d63 100644 --- a/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java @@ -32,6 +32,7 @@ import org.eclipse.rdf4j.model.vocabulary.RDFS; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.yaml.snakeyaml.LoaderOptions; import org.yaml.snakeyaml.Yaml; import org.yaml.snakeyaml.constructor.SafeConstructor; @@ -42,7 +43,7 @@ public class YAMLExtractor implements Extractor.ContentExtractor { private final Logger log = LoggerFactory.getLogger(getClass()); - private static final Yaml yml = new Yaml(new SafeConstructor()); + private static final Yaml yml = new Yaml(new SafeConstructor(new LoaderOptions())); private static final YAML vocab = YAML.getInstance(); diff --git a/core/src/test/java/org/apache/any23/Any23Test.java b/core/src/test/java/org/apache/any23/Any23Test.java index 34290ab9e..5efd3204a 100644 --- a/core/src/test/java/org/apache/any23/Any23Test.java +++ b/core/src/test/java/org/apache/any23/Any23Test.java @@ -51,6 +51,7 @@ import org.apache.any23.writer.TripleHandlerException; import org.apache.commons.io.IOUtils; import org.junit.AssumptionViolatedException; +import org.junit.Ignore; import org.junit.Test; import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.repository.Repository; @@ -286,6 +287,7 @@ protected int getSoTimeout() { * if there is an error defining input URI's */ @Test + @Ignore("url returns 404") public void testGZippedContent() throws IOException, URISyntaxException, ExtractionException { assumeOnlineAllowed(); final Any23 runner = new Any23(); diff --git a/pom.xml b/pom.xml index 3b85bf961..dcc68c25e 100644 --- a/pom.xml +++ b/pom.xml @@ -280,10 +280,10 @@ 0.7 2.17.2 1.7.36 - 2.4.0 + 2.8.0 4.2.6 1.1.1 - 2.13.1 + 2.14.2 2.11.0 1.7 @@ -351,6 +351,16 @@ commons-lang3 3.12.0 + + org.apache.commons + commons-text + 1.10.0 + + + org.yaml + snakeyaml + 2.0 + org.apache.httpcomponents httpclient @@ -396,7 +406,36 @@ biweekly 0.6.6 - + + org.bouncycastle + bcprov-jdk18on + 1.73 + + + org.osgi + osgi.annotation + 8.1.0 + + + org.osgi + org.osgi.util.function + 1.2.0 + + + com.fasterxml.jackson.core + jackson-annotations + ${jackson.version} + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + org.apache.tika @@ -656,12 +695,6 @@ - - org.yaml - snakeyaml - 1.30 - - junit @@ -837,6 +870,27 @@ af5f9c26-c09d-401f-a7fc-0785eeabeab3 + + + + com.google.guava + guava + 30.1.1-jre + + + + org.jsoup + jsoup + 1.15.1 + + + + xerces + xercesImpl + 2.12.2 + + + From 4d0d16ef29e6531e97ff115220fa357990579e4a Mon Sep 17 00:00:00 2001 From: tballison Date: Wed, 24 May 2023 09:48:18 -0400 Subject: [PATCH 2/2] ANY23-610 -- Upgrade Tika to 2.8.0 -- bump jsoup --- pom.xml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pom.xml b/pom.xml index dcc68c25e..cb1408c6f 100644 --- a/pom.xml +++ b/pom.xml @@ -399,7 +399,7 @@ org.jsoup jsoup - 1.15.1 + 1.16.1 net.sf.biweekly @@ -877,12 +877,6 @@ guava 30.1.1-jre - - - org.jsoup - jsoup - 1.15.1 - xerces