Skip to content

Commit

Permalink
Merge pull request #100 from Swirrl/strip-bom-from-csv-files
Browse files Browse the repository at this point in the history
Strip Byte-Order-Mark when reading CSVs
  • Loading branch information
RickMoynihan authored Jun 7, 2017
2 parents e4ff2fe + 2322150 commit 2af8209
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 2 deletions.
3 changes: 3 additions & 0 deletions dev/resources/grafter/tabular/test-bom.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
foo
a
b
3 changes: 3 additions & 0 deletions dev/resources/grafter/tabular/test-nobom.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
foo
a
b
1 change: 1 addition & 0 deletions project.clj
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
[grafter/vocabularies "0.2.0"]

[commons-logging "1.2"] ;; Shouldn't need this, but somehow excluded and required by SPARQLRepository
[commons-io/commons-io "2.4"]
[org.clojure/data.csv "0.1.3"]
[grafter/clj-excel "0.0.9" :exclusions [commons-codec]]
[me.raynes/fs "1.4.6"]
Expand Down
9 changes: 7 additions & 2 deletions src/tabular/grafter/tabular/csv.clj
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,16 @@
[clojure.java.io :as io]
[grafter.tabular.common :as tab]
[grafter.rdf.protocols :refer [raw-value]])
(:import [java.io IOException]))
(:import [java.io IOException]
[org.apache.commons.io.input BOMInputStream]))

(defmethod tab/read-dataset* :csv
[source opts]
(let [csv-seq (tab/mapply csv/read-csv (tab/mapply io/reader source opts) opts)]
(let [reader (-> source
io/input-stream
BOMInputStream.
(#(tab/mapply io/reader % opts)))
csv-seq (tab/mapply csv/read-csv reader opts)]
(if (nil? csv-seq)
(throw (IOException. (str "There was an error loading the CSV file: " source)))
(tab/make-dataset csv-seq))))
Expand Down
10 changes: 10 additions & 0 deletions test/grafter/tabular_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,16 @@
(is-a-dataset? dataset)
(has-metadata? dataset))))

(testing "Open CSV file and strip Byte-Order-Mark"
(testing "Strips BOM from first column name"
(let [dataset (read-dataset (io/resource "grafter/tabular/test-bom.csv"))
col-names (-> dataset (make-dataset move-first-row-to-header) (column-names))]
(is (= "foo" (first col-names)))))
(testing "Doesnt affect csv files without BOM"
(let [dataset (read-dataset (io/resource "grafter/tabular/test-nobom.csv"))
col-names (-> dataset (make-dataset move-first-row-to-header) (column-names))]
(is (= "foo" (first col-names))))))

(testing "Open text file"
(let [dataset (read-dataset (io/resource "grafter/tabular/test.txt") :format :csv)]
(testing "returns a dataset"
Expand Down

0 comments on commit 2af8209

Please sign in to comment.