diff --git a/dev/resources/grafter/tabular/test-bom.csv b/dev/resources/grafter/tabular/test-bom.csv new file mode 100644 index 00000000..3a39807f --- /dev/null +++ b/dev/resources/grafter/tabular/test-bom.csv @@ -0,0 +1,3 @@ +foo +a +b diff --git a/dev/resources/grafter/tabular/test-nobom.csv b/dev/resources/grafter/tabular/test-nobom.csv new file mode 100644 index 00000000..8c175220 --- /dev/null +++ b/dev/resources/grafter/tabular/test-nobom.csv @@ -0,0 +1,3 @@ +foo +a +b diff --git a/project.clj b/project.clj index 04d3ee48..b92bd232 100644 --- a/project.clj +++ b/project.clj @@ -13,6 +13,7 @@ [grafter/vocabularies "0.2.0"] [commons-logging "1.2"] ;; Shouldn't need this, but somehow excluded and required by SPARQLRepository + [commons-io/commons-io "2.4"] [org.clojure/data.csv "0.1.3"] [grafter/clj-excel "0.0.9" :exclusions [commons-codec]] [me.raynes/fs "1.4.6"] diff --git a/src/tabular/grafter/tabular/csv.clj b/src/tabular/grafter/tabular/csv.clj index 12e8983a..95edbb50 100644 --- a/src/tabular/grafter/tabular/csv.clj +++ b/src/tabular/grafter/tabular/csv.clj @@ -4,11 +4,16 @@ [clojure.java.io :as io] [grafter.tabular.common :as tab] [grafter.rdf.protocols :refer [raw-value]]) - (:import [java.io IOException])) + (:import [java.io IOException] + [org.apache.commons.io.input BOMInputStream])) (defmethod tab/read-dataset* :csv [source opts] - (let [csv-seq (tab/mapply csv/read-csv (tab/mapply io/reader source opts) opts)] + (let [reader (-> source + io/input-stream + BOMInputStream. + (#(tab/mapply io/reader % opts))) + csv-seq (tab/mapply csv/read-csv reader opts)] (if (nil? csv-seq) (throw (IOException. (str "There was an error loading the CSV file: " source))) (tab/make-dataset csv-seq)))) diff --git a/test/grafter/tabular_test.clj b/test/grafter/tabular_test.clj index 507ca7e7..fb8bc1f9 100644 --- a/test/grafter/tabular_test.clj +++ b/test/grafter/tabular_test.clj @@ -150,6 +150,16 @@ (is-a-dataset? dataset) (has-metadata? dataset)))) + (testing "Open CSV file and strip Byte-Order-Mark" + (testing "Strips BOM from first column name" + (let [dataset (read-dataset (io/resource "grafter/tabular/test-bom.csv")) + col-names (-> dataset (make-dataset move-first-row-to-header) (column-names))] + (is (= "foo" (first col-names))))) + (testing "Doesnt affect csv files without BOM" + (let [dataset (read-dataset (io/resource "grafter/tabular/test-nobom.csv")) + col-names (-> dataset (make-dataset move-first-row-to-header) (column-names))] + (is (= "foo" (first col-names)))))) + (testing "Open text file" (let [dataset (read-dataset (io/resource "grafter/tabular/test.txt") :format :csv)] (testing "returns a dataset"