Skip to content

Commit

Permalink
Add extra context to the validation error logging
Browse files Browse the repository at this point in the history
Also:
* ensure it is only printed once
* add a couple of extra test cases
  • Loading branch information
stevorobs3 committed Nov 30, 2023
1 parent 5dc1bf3 commit 393e1eb
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 64 deletions.
31 changes: 13 additions & 18 deletions src/csv2rdf/csvw.clj
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
(defn annotate-tables [tabular-source metadata-source]
(processing/get-metadata tabular-source metadata-source))

(defn- validate-rows
(defn- rows-are-valid?
"Validates the CSVW schema for the given tabular file, metadata and options.
`tabular-source` and `metadata-source` can be any of the following
Expand All @@ -36,20 +36,22 @@
If metadata-source is non-nil then processing will start from the
asscociated metadata document, otherwise it will start from
tabular-source."
tabular-source.
Returns true if validation errors were detected"
[tabular-source metadata-source]
(let [{:keys [tables] :as metadata} (processing/get-metadata tabular-source metadata-source)
table-group-dialect (:dialect metadata)
output-tables (remove properties/suppress-output? tables)
;;ctx (table-group-context mode metadata) ;; TODO this might be useful later when iterating over tables
]

(util/liberal-mapcat (fn [{:keys [url dialect] :as table}]
;;(validated-rows ctx table table-group-dialect)
(let [dialect (or dialect table-group-dialect)]
(csv/annotated-rows url table dialect)))
results (util/liberal-mapcat (fn [{:keys [url dialect] :as table}]
(let [dialect (or dialect table-group-dialect)]
(csv/annotated-rows url table dialect)))

output-tables)))
output-tables)]
(->> results
(mapcat :cells)
(mapcat :errors)
empty?)))

(defn only-validate-schema
"Only validate the data against the schemas in the metadata file, and
Expand All @@ -58,14 +60,7 @@
Returns a map with the key `:data-validation-errors?` set to a
boolean indicating whether any schema errors occurred."
[{:keys [tabular-source metadata-source]}]
(let [errors? (atom false)]
(doseq [{:keys [cells] row-number :source-number :as _row} (validate-rows tabular-source metadata-source)
{:keys [errors column-number column] :as _cell} cells
:when (seq errors)]
(reset! errors? true)
(doseq [error errors]
(println (format "Row #%d col #%d (column '%s') has error: " row-number column-number (:name column)) error)))
{:data-validation-errors? @errors?}))
{:data-validation-errors? (rows-are-valid? tabular-source metadata-source)})

(defn csv->rdf
"Runs the CSVW process for the given tabular or metadata data sources
Expand Down
7 changes: 5 additions & 2 deletions src/csv2rdf/logging.clj
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,11 @@
(warn [_this _msg])
(error [_this _msg]))

(defn memory-logger []
(->MemoryLogger (atom []) (atom [])))
(defn memory-logger
([]
(memory-logger (atom []) (atom [])))
([warnings errors]
(->MemoryLogger warnings errors)))

(def ^:dynamic *logger* (->ForwardingLogger))

Expand Down
37 changes: 25 additions & 12 deletions src/csv2rdf/tabular/csv.clj
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
(ns csv2rdf.tabular.csv
(:require [clojure.string :as string]
(:require [clojure.string :as str]
[clojure.string :as string]
[csv2rdf.metadata.dialect :as dialect]
[csv2rdf.tabular.csv.reader :as reader]
[csv2rdf.metadata.table :as table]
Expand Down Expand Up @@ -29,7 +30,7 @@
(column/from-titles idx titles default-lang))
titles)]
{:comments (mapv :comment comment-rows)
:columns columns}))
:columns columns}))

(defn ^{:tabular-spec "8.10.3"} validate-data-rows
"Validates the data rows in the tabular file and extracts any embedded comments. The row number of any rows
Expand Down Expand Up @@ -102,9 +103,13 @@
cell))
parsed-cells)))

(defn parse-row-cells [{:keys [cells] :as row} table {:keys [skipColumns] :as options}]
(defn parse-row-cells
[{:keys [cells source-row-number] :as row}
{:keys [url] :as table}
{:keys [skipColumns]}]
(let [columns (table/columns table)
cell-values (concat (drop skipColumns cells) (repeat "")) ;;extend cells to cover any virtual columns
;;extend cells to cover any virtual columns
cell-values (concat (drop skipColumns cells) (repeat ""))
cell-column-pairs (map vector cell-values columns)
parsed-cells (map-indexed (fn [col-idx [cell column]]
(let [result (cell/parse-cell cell column)
Expand All @@ -115,8 +120,16 @@
:column column)))
cell-column-pairs)]
;;log cell errors
(doseq [err (mapcat cell/errors parsed-cells)]
(logging/log-warning err))
(doseq [{:keys [errors column-number column]} parsed-cells
:when (seq errors)]
(doseq [error errors]
(logging/log-warning
(format "Row #%s col #%s (column '%s') in file: %s has error: %s"
source-row-number
column-number
(:name column)
(last (str/split (.toString url) #"/"))
error))))

(assoc row :parsed-cells parsed-cells)))

Expand All @@ -130,14 +143,14 @@
(assoc column-value-bindings :_row number :_sourceRow source-row-number)))

(defn get-cell-template-bindings [{:keys [column-number source-column-number column] :as cell}]
{:_name (util/percent-decode (properties/column-name column))
:_column column-number
{:_name (util/percent-decode (properties/column-name column))
:_column column-number
:_sourceColumn source-column-number})

(defn get-cell-urls [bindings table {:keys [column] :as cell}]
(let [property-urls {:aboutUrl (some-> (properties/about-url column) (template-property/resolve-uri-template-property bindings table))
(let [property-urls {:aboutUrl (some-> (properties/about-url column) (template-property/resolve-uri-template-property bindings table))
:propertyUrl (some-> (properties/property-url column) (template-property/resolve-uri-template-property bindings table))
:valueUrl (some-> (properties/value-url column) (template-property/resolve-value-uri-template-property cell column bindings table))}]
:valueUrl (some-> (properties/value-url column) (template-property/resolve-value-uri-template-property cell column bindings table))}]
(util/filter-values some? property-urls)))

(defn annotate-row [{:keys [number source-row-number parsed-cells] :as data-row} table title-column-indexes]
Expand All @@ -149,10 +162,10 @@
property-urls (get-cell-urls bindings table cell)]
(merge cell property-urls)))
parsed-cells)]
{:number number
{:number number
:source-number source-row-number
:cells (vec cells)
:titles (get-row-titles title-column-indexes parsed-cells)}))
:titles (get-row-titles title-column-indexes parsed-cells)}))

(defn skip-to-data-rows [rows {:keys [skipRows num-header-rows] :as options}]
(let [row-offset (+ skipRows num-header-rows)]
Expand Down
79 changes: 48 additions & 31 deletions test/csv2rdf/main_test.clj
Original file line number Diff line number Diff line change
@@ -1,41 +1,58 @@
(ns csv2rdf.main-test
(:require [csv2rdf.main :as sut]
(:require [csv2rdf.logging :as logging]
[csv2rdf.main :as sut]
[clojure.test :as t]))

;; See issue 47
;; Resolving template property URIs with values containing spaces should work

(defmacro capture
"Capture return value of body and stdout, and return a hashmap
of :return-value and :stdout."
[body]
`(let [s# (new java.io.StringWriter)]
(binding [*out* s#]
(let [ret# ~body]
{:return-value ret#
:stdout (str s#)}))))

(defn test-validate-data [tabular-file
metadata-file
failures]
(let [is-valid? (empty? failures)
warnings (atom [])
errors (atom [])]
(logging/with-logger
(logging/memory-logger warnings errors)
(t/is (= (sut/inner-main ["-t" tabular-file
"-u" metadata-file
"--validate-data"])
{:data-validation-errors? is-valid?}))
(t/is (= @warnings failures))
(t/is (= @errors [])))))

(t/deftest inner-main-test-validate-data
(t/testing "--validate-data")
(let [{:keys [return-value stdout]}
(capture (sut/inner-main ["-t" "./test/examples/validation/success.csv"
"-u" "./test/examples/validation/named-numbers.json"
"--validate-data"]))]
(t/is (= {:data-validation-errors? false} return-value))
(t/is (= "" stdout)))
(t/testing "--validate-data"

(let [{:keys [return-value stdout]}
(capture (sut/inner-main ["-t" "./test/examples/validation/fail-1.csv"
"-u" "./test/examples/validation/named-numbers.json"
"--validate-data"]))]
(t/is (= {:data-validation-errors? true} return-value))
(t/is (= "Row #3 col #2 (column 'number') has error: Cannot parse 'two' as type 'int': For input string: \"two\"\n"
stdout)))
(test-validate-data
"./test/examples/validation/success.csv"
"./test/examples/validation/named-numbers.json"
[])
(test-validate-data
"./test/examples/validation/fail-1.csv"
"./test/examples/validation/named-numbers.json"
["Row #3 col #2 (column 'number') in file: fail-1.csv has error: Cannot parse 'two' as type 'int': For input string: \"two\""])
(test-validate-data
"./test/examples/validation/fail-2.csv"
"./test/examples/validation/named-numbers.json"
["Row #3 col #2 (column 'number') in file: fail-2.csv has error: Cannot parse 'three' as type 'int': For input string: \"three\""])
(test-validate-data
"./test/examples/validation/fail-3.csv"
"./test/examples/validation/named-numbers.json"
["Row #3 col #2 (column 'number') in file: fail-3.csv has error: Cannot parse 'three' as type 'int': For input string: \"three\""
"Row #4 col #2 (column 'number') in file: fail-3.csv has error: Cannot parse 'four' as type 'int': For input string: \"four\""
"Row #5 col #2 (column 'number') in file: fail-3.csv has error: Cannot parse 'five' as type 'int': For input string: \"five\""])

(let [{:keys [return-value stdout]}
(capture (sut/inner-main ["-t" "./test/examples/validation/fail-2.csv"
"-u" "./test/examples/validation/named-numbers.json"
"--validate-data"]))]
(t/is (= {:data-validation-errors? true} return-value))
(t/is (= "Row #3 col #2 (column 'number') has error: Cannot parse 'three' as type 'int': For input string: \"three\"\n"
stdout))))
(test-validate-data
"./test/examples/validation/fail-4.csv"
"./test/examples/validation/named-numbers.json"
["Row #3 col #2 (column 'number') in file: fail-4.csv has error: Column value required"])
(test-validate-data
"./test/examples/validation/success.csv"
"./test/examples/validation/named-numbers-incorrect-schema.json"
["Row #2 col #1 (column 'name') in file: success.csv has error: Cannot parse 'one' as type 'int': For input string: \"one\""
"Row #3 col #1 (column 'name') in file: success.csv has error: Cannot parse 'two' as type 'int': For input string: \"two\""
"Row #4 col #1 (column 'name') in file: success.csv has error: Cannot parse 'three' as type 'int': For input string: \"three\""
"Row #5 col #1 (column 'name') in file: success.csv has error: Cannot parse 'four' as type 'int': For input string: \"four\""
"Row #6 col #1 (column 'name') in file: success.csv has error: Cannot parse 'five' as type 'int': For input string: \"five\""])))
5 changes: 5 additions & 0 deletions test/examples/validation/fail-3.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name,number
one,1
3,three
four,four
five,five
3 changes: 3 additions & 0 deletions test/examples/validation/fail-4.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
name,number
one,1
2,
17 changes: 17 additions & 0 deletions test/examples/validation/named-numbers-incorrect-schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"@context": "http://www.w3.org/ns/csvw",
"tableSchema": {
"columns": [
{
"name": "name",
"datatype": "int",
"required": true
},
{
"name": "number",
"required": true,
"datatype": "int"
}
]
}
}
1 change: 0 additions & 1 deletion test/examples/validation/named-numbers.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
{
"@context": "http://www.w3.org/ns/csvw",
"url": "fail-2.csv",
"tableSchema": {
"columns": [
{
Expand Down

0 comments on commit 393e1eb

Please sign in to comment.