From 88185194402a7d5bda2402964aa7cbc9e5409f25 Mon Sep 17 00:00:00 2001 From: Rick Moynihan Date: Wed, 12 Jul 2017 13:53:55 +0100 Subject: [PATCH 01/10] File reorg & tidy up. Retains namespace names so non-breaking change --- src/{rdf-repository => rdf-common}/grafter/rdf/repository.clj | 0 .../grafter/rdf/repository/registry.clj | 0 src/{rdf-repository => rdf-common}/grafter/rdf/sparql.clj | 0 src/{templater => rdf-common}/grafter/rdf/templater.clj | 0 src/{templater => tabular}/grafter/rdf/preview.clj | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename src/{rdf-repository => rdf-common}/grafter/rdf/repository.clj (100%) rename src/{rdf-repository => rdf-common}/grafter/rdf/repository/registry.clj (100%) rename src/{rdf-repository => rdf-common}/grafter/rdf/sparql.clj (100%) rename src/{templater => rdf-common}/grafter/rdf/templater.clj (100%) rename src/{templater => tabular}/grafter/rdf/preview.clj (100%) diff --git a/src/rdf-repository/grafter/rdf/repository.clj b/src/rdf-common/grafter/rdf/repository.clj similarity index 100% rename from src/rdf-repository/grafter/rdf/repository.clj rename to src/rdf-common/grafter/rdf/repository.clj diff --git a/src/rdf-repository/grafter/rdf/repository/registry.clj b/src/rdf-common/grafter/rdf/repository/registry.clj similarity index 100% rename from src/rdf-repository/grafter/rdf/repository/registry.clj rename to src/rdf-common/grafter/rdf/repository/registry.clj diff --git a/src/rdf-repository/grafter/rdf/sparql.clj b/src/rdf-common/grafter/rdf/sparql.clj similarity index 100% rename from src/rdf-repository/grafter/rdf/sparql.clj rename to src/rdf-common/grafter/rdf/sparql.clj diff --git a/src/templater/grafter/rdf/templater.clj b/src/rdf-common/grafter/rdf/templater.clj similarity index 100% rename from src/templater/grafter/rdf/templater.clj rename to src/rdf-common/grafter/rdf/templater.clj diff --git a/src/templater/grafter/rdf/preview.clj b/src/tabular/grafter/rdf/preview.clj similarity index 100% rename from src/templater/grafter/rdf/preview.clj rename to src/tabular/grafter/rdf/preview.clj From a184d8c5929728b8bbb9912ab887b744d133f26a Mon Sep 17 00:00:00 2001 From: Rick Moynihan Date: Wed, 12 Jul 2017 13:55:49 +0100 Subject: [PATCH 02/10] Remove legacy repack stuff in prep for repo split --- project.clj | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/project.clj b/project.clj index ea6f7602..af16ba79 100644 --- a/project.clj +++ b/project.clj @@ -25,8 +25,7 @@ :codox {:defaults {:doc "FIXME: write docs" :doc/format :markdown} :output-dir "api-docs" - :sources ["src/common" "src/rdf-repository" "src/tabular" - "src/templater" "src/rdf-common" "src/pipeline" + :sources ["src/tabular" "src/rdf-common" "src/pipeline" ;; Include docs from grafter-url project too "../grafter-url/src"] @@ -43,27 +42,10 @@ :pedantic? true - :repack [{:subpackage "rdf.common" - :dependents #{"templater"} - :path "src/rdf-common"} - {:subpackage "templater" - :path "src/templater"} - {:type :clojure - :path "src/pipeline" - :levels 2} - {:type :clojure - :path "src/rdf-repository" - :levels 2} - {:type :clojure - :path "src/tabular" - :levels 2}] - :profiles {:clj-19 { :dependencies [[org.clojure/clojure "1.9.0-alpha14"]] } :dev {:plugins [[com.aphyr/prism "0.1.1"] ;; autotest support simply run: lein prism - [codox "0.8.10"] - [lein-repack "0.2.10" :exclusions [org.clojure/clojure - org.codehaus.plexus/plexus-utils]]] + [codox "0.8.10"]] :dependencies [[com.aphyr/prism "0.1.3"] [org.slf4j/slf4j-simple "1.7.21"] From 08063a9fd19e311b9feba8a2232eb25264fe6dd6 Mon Sep 17 00:00:00 2001 From: Rick Moynihan Date: Wed, 12 Jul 2017 14:47:33 +0100 Subject: [PATCH 03/10] Remove grafter.pipeline.types dependency from rdf code. The implication of this is that declare-pipeline and the `lein-grafter` plugin will be unsupported in the OSS grafter from 0.9.x. This code has moved into Swirrls internal grafter-server project. --- src/pipeline/grafter/pipeline/types.clj | 379 ---------------------- src/rdf-common/grafter/rdf/repository.clj | 5 - 2 files changed, 384 deletions(-) delete mode 100644 src/pipeline/grafter/pipeline/types.clj diff --git a/src/pipeline/grafter/pipeline/types.clj b/src/pipeline/grafter/pipeline/types.clj deleted file mode 100644 index 328ec675..00000000 --- a/src/pipeline/grafter/pipeline/types.clj +++ /dev/null @@ -1,379 +0,0 @@ -(ns grafter.pipeline.types - "This namespace code for parsing and interpreting - grafter.pipeline/declare-pipeline type signatures. In particular it - defines a macro deftype-reader that can be used to coerce/read - strings into their appropriate clojure types. - - We use the declared pipelines signature to guide the interpretation - of a string the target type." - (:require [clojure.data :refer [diff]] - [clojure.edn :as edn] - [clojure.instant :as inst] - [clojure.instant :refer [read-instant-date]] - [clojure.set :as set] - [clojure.string :as str] - [clojure.java.io :as io]) - (:import [java.net URI URL] - [java.util UUID Date Map])) - -(def parameter-types - "Atom containing the parameter type hierarchy of supported pipeline - types. As declared through declare-pipeline. - - Types in this hierarchy should ultimately be coerceable through the - parse-parameter multi-method. - - In addition to using the multi-method to introduce new types, you - can gain more code reuse by extending the hierarchy with: - - (swap! parameter-types derive ::foo ::bar)" - (atom (make-hierarchy))) - -(defmulti parse-parameter (fn [target-type value options] - (let [input-type (type value)] - [input-type target-type])) - :hierarchy parameter-types) - -(defmethod parse-parameter :default [target-type input-val opts] - (throw (ex-info (str "No grafter.pipeline.types/parse-parameter defined to coerce values to type " target-type) - {:error :type-reader-error - :target-type target-type - :value input-val - :options opts}))) - -(defmethod parse-parameter [String ::root-type] [_ val opts] - (throw (ex-info "root-type is abstract and does not correspond to any concrete object coercion." {:error ::abstract-type-error}))) - -(defmethod parse-parameter [String ::primitive] [_ val opts] - (edn/read-string val)) - -(defmethod parse-parameter [String Boolean] [_ val opts] - (boolean (edn/read-string val))) - -(defmethod parse-parameter [String Integer] [_ val opts] - (Integer/parseInt val)) - -(defmethod parse-parameter [String clojure.lang.BigInt] [_ val opts] - (bigint (edn/read-string val))) - -(defmethod parse-parameter [String Double] [_ val opts] - (Double/parseDouble val)) - -(defmethod parse-parameter [String String] [_ val opts] - val) - -(defmethod parse-parameter [String Float] [_ val opts] - (Float/parseFloat val)) - -(defmethod parse-parameter [String ::uri] [_ val opts] - (java.net.URI. val)) - -(defmethod parse-parameter [String java.util.Date] [_ val opts] - (inst/read-instant-date val)) - -(defmethod parse-parameter [String clojure.lang.Keyword] [_ val opts] - (keyword val)) - -(defmethod parse-parameter [String java.util.UUID] [_ val opts] - (java.util.UUID/fromString val)) - -(defmethod parse-parameter [String java.io.Reader] [_ val opts] - (io/reader val)) - -(defmethod parse-parameter [String ::map] [_ val opts] - (let [m (edn/read-string val)] - (if-not (map? val) - (throw (IllegalArgumentException. "Expected to receive a map got a " (type m))) - m))) - -(defmethod parse-parameter [String ::vector] [_ val opts] - (let [m (edn/read-string val)] - (if-not (vector? val) - (throw (IllegalArgumentException. "Expected to receive a vector got a " (type m))) - m))) - -(defmethod parse-parameter [Map ::map] [_ val opts] - ;; TODO move to grafter-server? - (edn/read-string (io/reader (:value val)))) - -(defmethod parse-parameter [String ::map] [_ val opts] - (edn/read-string val)) - -(defmethod parse-parameter [String ::file] [_ val opts] - (io/file val)) - -(defmethod parse-parameter [String ::text-file] [_ val opts] - ;; assume its a file path to a text file and return a reader on it - (io/reader val)) - -(defmethod parse-parameter [String ::binary-file] [_ val opts] - (io/input-stream val)) - -(defmethod parse-parameter [String java.net.URL] [_ val opts] - (URL. val)) - -(swap! parameter-types derive ::value ::root-type) - -(swap! parameter-types derive ::primitive ::value) - -(swap! parameter-types derive Boolean ::primitive) - -(swap! parameter-types derive Boolean ::primitive) - -(swap! parameter-types derive String ::primitive) - -(swap! parameter-types derive Float ::primitive) - -(swap! parameter-types derive Long ::primitive) - -(swap! parameter-types derive Integer ::primitive) - -(swap! parameter-types derive Double ::primitive) - -(swap! parameter-types derive clojure.lang.Keyword ::primitive) - -(swap! parameter-types derive java.util.Date ::primitive) - -(swap! parameter-types derive java.util.UUID ::primitive) - -(swap! parameter-types derive ::uri ::value) - -(swap! parameter-types derive ::url ::uri) - -(swap! parameter-types derive java.net.URI ::uri) - -(swap! parameter-types derive java.net.URL ::url) - -(swap! parameter-types derive ::map ::value) - -(swap! parameter-types derive ::sparql-query-endpoint ::map) - -(swap! parameter-types derive ::vector ::value) - -(swap! parameter-types derive ::file ::root-type) - -(swap! parameter-types derive ::binary-file ::file) - -(swap! parameter-types derive ::text-file ::file) - -(swap! parameter-types derive ::rdf-file ::file) - -(swap! parameter-types derive ::another-file ::file) - -(swap! parameter-types derive java.io.Reader ::text-file) - -(swap! parameter-types derive ::tabular-dataset ::file) - -(prefer-method parse-parameter [String ::tabular-dataset] [String Map]) - -(prefer-method parse-parameter [String Map] [String ::root-type]) - -(prefer-method parse-parameter [Map ::tabular-dataset] [Map Map]) - -(swap! parameter-types derive java.io.InputStream ::binary-file) - -(defn supported-parameter? - "Predicate function that returns true if the parameter type is - a supported by parameter-type. - - Supported parameters are parameter types (either classes or - keywords) which are supported either an explicit dispatch for the - parse-parameter multimethod or are reachable through - @parameter-types type hierarchy." - [p] - (some (partial isa? @parameter-types p) - (map second (remove keyword? - (keys (methods parse-parameter)))))) - -(defn preferred-type - "When there are multiple defmethod implementations for - parse-parameter this returns the prefered as declared via clojure's - prefer-method." - - [param-hier parent-types] - (let [n-parents (count parent-types) - pref? (fn [t] - (when ((prefers parse-parameter) [java.util.Map t]) - t))] - (cond - (= 1 n-parents) (first parent-types) - (>= n-parents 2) (some pref? parent-types)))) - -(defn- parameter-type-chain* - [t] - (let [param-hier @parameter-types] - (when (supported-parameter? t) - (let [ps (parents param-hier t) - supported-parents (filter supported-parameter? (parents param-hier t)) - - chosen-parent (if (coll? ps) - (preferred-type param-hier supported-parents) - ps)] - (cons t - (mapcat parameter-type-chain* [chosen-parent])))))) - -(defn ^:no-doc parameter-type-chain - "Interogates the parse-parameter multi-method and returns an ordered - sequence representing the hierarchy chain. The order of items in - the chain respects both the hierarchy of parameter-types and - prefer-method." - [t] - (distinct (parameter-type-chain* t))) - -;;[Symbol] -> {:arg-types [Symbol], :return-type Symbol]} -(defn ^:no-doc parse-type-list - "Parses a given list of symbols expected to represent a pipeline - type definition. Validates the list has the expected format - n >= 0 - parameter types followed by a '-> followed by the return - type. Returns a record containing the ordered list of parameter - symbols and the return type symbol." - [l] - (let [c (count l)] - (if (< c 2) - (throw (IllegalArgumentException. "Invalid type declaration - requires at least -> and return type")) - (let [arg-count (- c 2) - separator (l arg-count) - return-type (last l) - arg-types (vec (take arg-count l))] - - (if (= '-> separator) - {:arg-types arg-types :return-type return-type} - (throw (IllegalArgumentException. "Invalid type declaration: Expected [args -> return-type"))))))) - -;;[a] -> [b] -> () -(defn- validate-argument-count - "Throws an exception if the number of parameter types in a pipeline - type declaration do not match the number of elements in the pipeline - var's argument list." - [declared-arg-list type-list] - (let [comp (compare (count declared-arg-list) (count type-list))] - (if (not= 0 comp) - (let [det (if (pos? comp) "few" "many") - msg (str "Too " det " argument types provided for pipeline argument list " declared-arg-list " (got " type-list ")")] - (throw (IllegalArgumentException. msg)))))) - -(defn ^:no-doc resolve-pipeline-fn [ns sym] - (if (symbol? sym) - (ns-resolve ns sym) - (throw (IllegalArgumentException. (str "Unexpected syntax for pipeline declaration. Was expecting a symbol but got a " (type sym)))))) - -;;Symbol -> Class -(defn ^:no-doc resolve-parameter-type - "Attempts to resolve a symbol representing a pipeline parameter type - to a class/keyword instance. This is necessary to resolve symbols - in the declare-pipeline macro to their corresponding classes or - keyword types. - - We also support resolving vars that dereference to a class or - keyword, which allows the declare-pipeline arguments definition to - also reference vars. - - An IllegalArgumentException is raised if the parameter either - doesn't resolve or ultimately resolves to anything other than a - keyword or class." - ([sym] (resolve-parameter-type *ns* sym)) - ([ns sym] - (cond - (symbol? sym) (if-let [cls-or-var (ns-resolve ns sym)] - (resolve-parameter-type ns cls-or-var) - (throw (IllegalArgumentException. (str "Failed to resolve " sym " to a parameter type in namespace: " ns)))) - (keyword? sym) sym - (class? sym) sym - (var? sym) (resolve-parameter-type ns @sym) - :else (throw (IllegalArgumentException. (str "Unexpected type of parameter " (type sym))))))) - -(defn- get-arg-descriptor [name-sym type-sym doc doc-meta] - (let [klass-or-kw (resolve-parameter-type type-sym)] - (if (supported-parameter? klass-or-kw) - (let [common {:name name-sym :class klass-or-kw :doc doc}] - (if doc-meta - (assoc common :meta doc-meta) - common)) - (throw (IllegalArgumentException. (str "Unsupported pipeline parameter type: " type-sym)))))) - -;;Symbol -> PipelineType -(defn- pipeline-type-from-return-type-sym - "Infers the 'type' (graft or pipe) of a pipeline function from its - return type. Throws an exception if the return type symbol is - invalid." - [ret-sym] - (condp = ret-sym - '(Seq Statement) :graft ;; deprecated - '(Seq Quad) :graft - '[Quad] :graft - 'Quads :graft - :grafter.pipeline.types/rdf-file :graft - 'Dataset :pipe ;; deprecated - :grafter.pipeline.types/tabular-dataset :pipe - (let [msg (str "Invalid return type " ret-sym " for pipeline function: required Dataset or [Quad]")] - (throw (IllegalArgumentException. msg))))) - -;;[a] -> {a b} -> [[a b]] -(defn- correlate-pairs - [ordered-keys m] - "Orders the pairs in a map so the keys are in the same order as the - elements in the given 'reference' vector. - (correlate-pairs [:b :a] {:a 1 :b 2}) => [[:b 2] [:a 1]]" - {:pre [(= (count ordered-keys) (count m)) - (= (set ordered-keys) (set (keys m)))]} - (let [indexes (into {} (map-indexed #(vector %2 %1) ordered-keys))] - (vec (sort-by (comp indexes first) m)))) - - -;;[Symbol] -> [Symbol] -> {Symbol String} -> [ArgDescriptor] -(defn- resolve-pipeline-arg-descriptors [arg-names arg-type-syms doc-map] - (validate-argument-count arg-names arg-type-syms) - (let [[missing-doc unknown-doc _] (diff (set arg-names) (set (keys doc-map)))] - (cond - (not (empty? missing-doc)) - (throw (IllegalArgumentException. (str "No documentation found for variable(s): " missing-doc))) - - (not (empty? unknown-doc)) - (throw (IllegalArgumentException. (str "Found documentation for unknown variable(s): " unknown-doc))) - - :else - (let [correlated-docs (correlate-pairs arg-names doc-map)] - (mapv (fn [n ts [doc-name doc]] (get-arg-descriptor doc-name ts doc (-> (meta doc-name) - ;; remove line number metadata inserted by clojure as its superfluous here - (dissoc :file :line :column)))) - arg-names - arg-type-syms - correlated-docs))))) - -(defn- validate-supported-pipeline-operations! [supported-operations] - (let [ops (set supported-operations) - valid-operations #{:append :delete} - invalid-operations (set/difference ops valid-operations)] - (when-not (empty? invalid-operations) - (throw (IllegalArgumentException. (str "Invalid supported operations for pipeline: " - (str/join ", " invalid-operations) - ". Valid operations are: " (str/join ", " valid-operations))))))) - -;;Var -> [Symbol] -> Metadata -> PipelineDef -(defn ^:no-doc create-pipeline-declaration - "Inspects a var containing a pipeline function along with an - associated type definition and metadata map. The type definition - should declare the type of each parameter and the return type of the - pipeline. The metadata map must contain a key-value pair for each - named parameter in the pipeline function argument list. The value - corresponding to each key in the metadata map is expected to be a - String describing the parameter. The opts map can contain - an optional :supported-operations key associated to a collection - containing :append and/or :delete. These operations indicate whether - the data returned from the pipeline can be appended to or deleted - from the destination." - [sym ns type-list metadata opts] - (let [def-var (resolve-pipeline-fn *ns* sym) - def-meta (meta def-var) - arg-list (first (:arglists def-meta)) - {:keys [arg-types return-type]} (parse-type-list type-list) - pipeline-type (pipeline-type-from-return-type-sym return-type) - args (resolve-pipeline-arg-descriptors arg-list arg-types metadata) - supported-operations (:supported-operations opts #{:append})] - (validate-supported-pipeline-operations! supported-operations) - {:var def-var - :doc (or (:doc def-meta) "") - :args args - :type pipeline-type - :declared-args arg-list - :supported-operations supported-operations})) diff --git a/src/rdf-common/grafter/rdf/repository.clj b/src/rdf-common/grafter/rdf/repository.clj index a2dd46f3..a1853470 100644 --- a/src/rdf-common/grafter/rdf/repository.clj +++ b/src/rdf-common/grafter/rdf/repository.clj @@ -4,7 +4,6 @@ [grafter.rdf] [me.raynes.fs :as fs] [grafter.rdf.protocols :as pr] - [grafter.pipeline.types :as types] [grafter.rdf.io :refer :all] [clojure.tools.logging :as log] [grafter.rdf :as rdf] @@ -638,10 +637,6 @@ res (batched-query qstr conn limit (+ limit offset))))))) -(defmethod types/parse-parameter [String ::sparql-query-endpoint] [_ val opts] - (let [endpoint (:endpoint val)] - (sparql-repo (types/parse-parameter String ::types/map val)))) - (extend-type RepositoryConnection pr/ITripleReadable From 56ca592d7b901ba196e64c78d5142586c2c66094 Mon Sep 17 00:00:00 2001 From: Rick Moynihan Date: Wed, 12 Jul 2017 14:50:55 +0100 Subject: [PATCH 04/10] Remove grafter.tabular dependency from grafter/grafter. From 0.9.0 onwards grafter/grafter will include only RDF processing code. Code that depends on grafter.tabular can include a new legacy dependency which can be found at: https://github.com/Swirrl/grafter.tabular Or includable with the clojars coordinates: [grafter/grafter.tabular "0.9.0"] ;; (check version) --- project.clj | 14 +- src/pipeline/grafter/pipeline.clj | 148 ---- src/pipeline/grafter/pipeline/plugin.clj | 39 -- src/tabular/grafter/rdf/preview.clj | 144 ---- src/tabular/grafter/sequences.clj | 58 -- src/tabular/grafter/tabular.clj | 667 ------------------ src/tabular/grafter/tabular/common.clj | 353 ---------- src/tabular/grafter/tabular/csv.clj | 36 - src/tabular/grafter/tabular/edn.clj | 31 - src/tabular/grafter/tabular/excel.clj | 115 ---- src/tabular/grafter/tabular/melt.clj | 141 ---- test/grafter/pipeline/types_test.clj | 92 --- test/grafter/pipeline_test.clj | 259 ------- test/grafter/rdf/preview_test.clj | 179 ++--- test/grafter/rdf/templater_test.clj | 2 +- test/grafter/sequences_test.clj | 33 +- test/grafter/tabular/melt_test.clj | 59 -- test/grafter/tabular_test.clj | 820 ----------------------- 18 files changed, 111 insertions(+), 3079 deletions(-) delete mode 100644 src/pipeline/grafter/pipeline.clj delete mode 100644 src/pipeline/grafter/pipeline/plugin.clj delete mode 100644 src/tabular/grafter/rdf/preview.clj delete mode 100644 src/tabular/grafter/sequences.clj delete mode 100644 src/tabular/grafter/tabular.clj delete mode 100644 src/tabular/grafter/tabular/common.clj delete mode 100644 src/tabular/grafter/tabular/csv.clj delete mode 100644 src/tabular/grafter/tabular/edn.clj delete mode 100644 src/tabular/grafter/tabular/excel.clj delete mode 100644 src/tabular/grafter/tabular/melt.clj delete mode 100644 test/grafter/pipeline/types_test.clj delete mode 100644 test/grafter/pipeline_test.clj delete mode 100644 test/grafter/tabular/melt_test.clj delete mode 100644 test/grafter/tabular_test.clj diff --git a/project.clj b/project.clj index af16ba79..d6b2c968 100644 --- a/project.clj +++ b/project.clj @@ -1,5 +1,5 @@ -(defproject grafter/grafter "0.8.12-SNAPSHOT" - :description "Tools for the hard graft of data processing" +(defproject grafter/grafter "0.9.0-SNAPSHOT" + :description "Tools for the hard graft of linked data processing" :url "http://grafter.org/" :license {:name "Eclipse Public License - v1.0" :url "https://www.eclipse.org/legal/epl-v10.html"} @@ -13,12 +13,8 @@ [grafter/vocabularies "0.2.0"] [commons-logging "1.2"] ;; Shouldn't need this, but somehow excluded and required by SPARQLRepository - [commons-io/commons-io "2.4"] - [org.clojure/data.csv "0.1.3"] - [grafter/clj-excel "0.0.9" :exclusions [commons-codec]] [me.raynes/fs "1.4.6"] [potemkin "0.4.3"] - [incanter/incanter-core "1.5.7" :exclusions [net.sf.opencsv/opencsv commons-codec]] [com.novemberain/pantomime "2.8.0"]] ;; mimetypes @@ -44,11 +40,9 @@ :profiles {:clj-19 { :dependencies [[org.clojure/clojure "1.9.0-alpha14"]] } - :dev {:plugins [[com.aphyr/prism "0.1.1"] ;; autotest support simply run: lein prism - [codox "0.8.10"]] + :dev {:plugins [[codox "0.8.10"]] - :dependencies [[com.aphyr/prism "0.1.3"] - [org.slf4j/slf4j-simple "1.7.21"] + :dependencies [[org.slf4j/slf4j-simple "1.7.21"] [prismatic/schema "1.1.3"] [criterium "0.4.4"]] diff --git a/src/pipeline/grafter/pipeline.clj b/src/pipeline/grafter/pipeline.clj deleted file mode 100644 index 00be88fa..00000000 --- a/src/pipeline/grafter/pipeline.clj +++ /dev/null @@ -1,148 +0,0 @@ -(ns grafter.pipeline - "Functions to declare the presence of Grafter pipeline functions to - external processes and programs such as lein-grafter and Grafter - server." - (:require - [grafter.pipeline.types :refer [resolve-parameter-type create-pipeline-declaration - parse-parameter]])) - -(defonce ^{:doc "Map of pipelines that have been declared and exported to the pipeline runners"} exported-pipelines (atom {})) - -(defn ^:no-doc register-pipeline! - "Registers the pipeline object map with the exported pipelines." - ([sym pipeline-obj] (register-pipeline! sym *ns* nil pipeline-obj)) - ([sym ns pipeline-obj] (register-pipeline! sym ns nil pipeline-obj)) - ([sym ns display-name pipeline-obj] - (let [pipeline (-> pipeline-obj - (assoc :name sym - :namespace (symbol (str ns))) - (cond-> display-name (assoc :display-name display-name)))] - (swap! exported-pipelines #(assoc % sym pipeline))))) - -(defn ^:no-doc qualify-symbol - "Returns a fully qualified name for the supplied symbol or string or nil if - it's not found." - [sym] - (let [resolved-symbol (resolve (symbol sym))] - (when resolved-symbol - (let [ns (->> resolved-symbol meta :ns)] - (if (re-find #"\/" (str sym)) - (symbol sym) - (symbol (str ns "/" sym))))))) - -(defn- parse-pipeline-declaration [arg-list] - (let [[sym display-name-or-type-form] arg-list - has-display? (string? display-name-or-type-form) - display-name (if has-display? display-name-or-type-form nil) - rest-args (drop (if has-display? 2 1) arg-list) - [type-form metadata] rest-args - opts (into {} (map vec (partition 2 2 (drop 2 rest-args))))] - {:sym sym - :display-name display-name - :type-form type-form - :metadata metadata - :opts opts})) - -(defmacro declare-pipeline - "Declare a pipeline function and expose it to other services such as - the grafter leiningen plugin and grafter-server. - - declare-pipeline takes a symbol identifying the function to expose, - an optional human friendly title string, a type-form describing the - pipelines arguments and return type, a map of metadata describing - each argument and an optional sequence of key-value pairs containing - additional options. The only recognised option is :supported-operations - which indicates whether the pipeline output supports being append to - or deleted from the pipeline destination. - - (defn my-pipeline [a] [(->Quad a a a a)]) - - (declare-pipeline my-pipeline \"My example pipeline\" [URI -> Quads] - {a \"Argument a\"} - :supported-operations #{:append :delete}) - - Note that the type-form/signature specifies the input arguments - followed by a -> and an output type. - - All input argument types MUST be imported into the namespace and - have a type reader declared via - grafter.pipeline.types/deftype-reader. - - Output types do not need be imported into the namespace, and must - either be the symbols Quads or Dataset, or an alias such as - \"[Quad]\". - - Default type-readers are defined for common grafter/clojure types." - {:style/indent :defn} - ([& args] - (let [{:keys [sym display-name type-form metadata opts]} (parse-pipeline-declaration args)] - (if-let [sym (qualify-symbol sym)] - (let [decl (create-pipeline-declaration sym *ns* type-form metadata opts)] - (register-pipeline! sym *ns* display-name decl)) - (throw (ex-info (str "The symbol " sym " could not be resolved to a var.") {:error :pipeline-declaration-error - :sym sym})))) - nil)) - -(defn ^:no-doc all-declared-pipelines - "List all the declared pipelines" - ([] (all-declared-pipelines nil)) - ([type] - (let [type? (if type - #(= (keyword type) (:type %)) - identity)] - - (filter type? (sort-by (comp str :var) (vals @exported-pipelines)))))) - -(defn ^:no-doc coerce-arguments - ([namespace expected-types supplied-args] (coerce-arguments namespace expected-types supplied-args {})) - ([namespace expected-types supplied-args opts] - (map (fn [et sa] - (let [klass (:class et)] - (try - (parse-parameter (resolve-parameter-type namespace klass) sa opts) - (catch Exception ex - (throw (ex-info "Unexpected exception parsing pipeline parameter type." {:error ::coerce-argument-error - :expected-type et - :supplied-argument sa} ex)))))) expected-types supplied-args))) - - -(defn find-pipeline - "Find a pipeline by its fully qualified pipeline. Accepts either a - string or a symbol identifying the pipeline - e.g. \"my.namespace/my-pipeline\" or 'my.namespace/my-pipeline" - [name] - (get @exported-pipelines (symbol name))) - -(defmulti coerce-pipeline-arguments - "Coerce the arguments based on the pipelines stated types. Receives - a fully qualified name of the pipeline (a symbol or string), or a - pipeline-object and returns the arguments as coerced values, or - raise an error if a coercion isn't possible. - - Uses the multi-method grafter.pipeline.types/parse-parameter to coerce - values." - (fn [pipeline supplied-args] - (type pipeline))) - -(defmethod coerce-pipeline-arguments clojure.lang.IPersistentMap [pipeline supplied-args] - (let [expected-types (:args pipeline) - namespace (:namespace pipeline)] - (coerce-arguments namespace expected-types supplied-args))) - -(defmethod coerce-pipeline-arguments :default [pipeline-name supplied-args] - (let [pipeline (find-pipeline pipeline-name)] - (if pipeline - (coerce-pipeline-arguments pipeline supplied-args) - (throw (ex-info "Could not find pipeline named: " pipeline-name {:error ::pipeline-not-found - ::pipeline-name pipeline-name}))))) - -(defn ^:no-doc execute-pipeline-with-coerced-arguments - "Execute the pipeline specified by pipeline-sym by applying it to - type-coerced versions of the supplied arguments. - - Expects supplied-args to be either strings or already of the - declared data type formats." - [pipeline-sym supplied-args] - (let [coerced-args (coerce-pipeline-arguments pipeline-sym supplied-args) - pipeline-fn (:var (@exported-pipelines pipeline-sym))] - (apply pipeline-fn coerced-args))) diff --git a/src/pipeline/grafter/pipeline/plugin.clj b/src/pipeline/grafter/pipeline/plugin.clj deleted file mode 100644 index a7dba031..00000000 --- a/src/pipeline/grafter/pipeline/plugin.clj +++ /dev/null @@ -1,39 +0,0 @@ -(ns grafter.pipeline.plugin - {:no-doc true} - (:require [clojure.string :as string] - [grafter.pipeline :refer [all-declared-pipelines]])) - -(defn fully-qualified-name [pipeline] - (.substring (str (:var pipeline)) 2)) - -;; NOTE -;; -;; This code exists here for lein-grafter, as the plugins code doesn't -;; run in the context of the project. Putting this code here -;; dramatically simplifies things as the plugin needs to use -;; syntax-quote to eval-in-project. The less code in the syntax quote -;; form the better! - -(defn collapse-whitespace [s] - (when s - (string/replace s #"(\n| )+" " "))) - -(def format-str "%-60s %-9s %-20s %s") - -(def header-row (String/format format-str (into-array Object ["Pipeline" "Type" "Arguments" "Description"]))) - -(defn- format-pipeline [pipeline] - (let [pattern format-str - data (into-array Object - [(fully-qualified-name pipeline) - (name (:type pipeline)) - (if-let [args (map :name (:args pipeline))] - (string/join ", " args) - "???") - (if-let [doc (collapse-whitespace (:doc pipeline))] - doc - "No doc string")])] - (String/format pattern data))) - -(defn list-pipelines [type] - (cons header-row (map format-pipeline (all-declared-pipelines type)))) diff --git a/src/tabular/grafter/rdf/preview.clj b/src/tabular/grafter/rdf/preview.clj deleted file mode 100644 index 64dc07e1..00000000 --- a/src/tabular/grafter/rdf/preview.clj +++ /dev/null @@ -1,144 +0,0 @@ -(ns grafter.rdf.preview - "Tool support for rendering previews of grafter.tabular graph-fn templates - with values from datasets." - (:require [clojure.walk] - [clojure.edn :as edn] - [grafter.tabular :refer [make-dataset]]) - (:import [incanter.core Dataset])) - -(defn- symbolize-keys [m] - (zipmap (map (comp symbol name) (keys m)) (vals m))) - -(defn- substitution-map [bindings row-data] - (symbolize-keys (merge (select-keys row-data (map (comp keyword name) (:keys bindings))) - (select-keys row-data (map name (:strs bindings)))))) - -(defmulti ^:private templatable? - "Predicate that returns true if the supplied argument should be emitted when - attempting to inline global vars in a graph template. - - Used with bind-constants." - - class) - -(defmethod templatable? nil [v] true) - -(defmethod templatable? Object [v] false) - -(defmethod templatable? Number [v] true) - -(defmethod templatable? String [v] true) - -(defmethod templatable? clojure.lang.Keyword [v] true) - -(defn- bind-constants - "Walks the given form and resolves symbols from the namespace to their vars, - returning the values they reference if they are templatable?. - - Non templatable values are unexpanded/uninlined and left as symbols." - [ns form] - (clojure.walk/postwalk - (fn [f] - (if (and (symbol? f) - (ns-resolve ns f)) - (if-let [varr (get (ns-map ns) f)] - (if (templatable? @varr) - @varr - f) - f) - f)) form)) - -;; The UnreadableForm record is used to wrap serializable representations of -;; unserializable EDN/Clojure forms on the wire. -(defrecord UnreadableForm [form-string form-class]) - -(defn unreadable-form [val] - (->UnreadableForm (str val) (str (.getName (class val))))) - -(defn- readable-form? - "Returns false if the form can't be read, or returns the form (truthy) if it - can." - [form] - (try - (binding [*print-dup* true] (pr-str form)) - (catch IllegalArgumentException ex - false))) - -(defprotocol ToPrintable - (->printable-form [o] - "Converts object into a printable form by returning an UnreadableForm - representation of it (or its constituent parts if it can't be - serialized.)")) - -(extend-protocol ToPrintable - incanter.core.Dataset - (->printable-form [ds] - (make-dataset (map ->printable-form (:rows ds)) - (map ->printable-form (:column-names ds)))) - - java.util.Map - (->printable-form [form] - (letfn [(make-readable [val] - (if (readable-form? val) - val - (unreadable-form val)))] - (zipmap (map make-readable (keys form)) - (map make-readable (vals form))))) - - java.lang.Object - (->printable-form [form] - (if (readable-form? form) - form - (unreadable-form form))) - - nil - (->printable-form [nl] - nil)) - -(defn preview-graph - "Takes a dataset a function built via grafter.tabular/graph-fn and a row - number, and returns an EDN datastructure representing the template (body) of - the graph-fn function with column variables substituted for data. - - Takes an optional final argument which can be :render-constants, if the user - would also like to substitute symbols from within the graph-fn body with - renderable constants found in referenced vars." - ([dataset graphf row] - (preview-graph dataset graphf row false)) - - ([dataset graphf row render-constants?] - (let [form (:grafter.tabular/template (meta graphf)) - bindings (first (second form)) - body-forms (drop 2 form) - printable-row-data (->printable-form (nth (:rows dataset) row nil)) - subs (substitution-map bindings printable-row-data)] - - {:bindings bindings :row printable-row-data :template - (->> body-forms - (map (fn [body-form] - (let [replaced-vals (clojure.walk/postwalk-replace subs body-form)] - (if (= :render-constants render-constants?) - (bind-constants (:grafter.tabular/defined-in-ns (meta graphf)) replaced-vals) - replaced-vals)))))}))) - -(comment - - (do (require '(grafter [tabular :refer [graph-fn make-dataset]])) - (require '(grafter [rdf :refer [s]])) - (require '(grafter.rdf [templater :refer [graph]])) - - (def ds (make-dataset [[(Object.) "bar" (s "baz")]])) - - (def foaf:friendOf "http://foaf/friendOf") - - (def my-template (graph-fn [{:strs [a b c] :keys [c]}] - (graph "http://foo.com/" - [(first b) - [b c]] - [a - [foaf:friendOf c] - [b c] - [b c]]) - (graph a [a [b c]])))) - - (preview-graph ds my-template 0)) diff --git a/src/tabular/grafter/sequences.clj b/src/tabular/grafter/sequences.clj deleted file mode 100644 index 8fd20fee..00000000 --- a/src/tabular/grafter/sequences.clj +++ /dev/null @@ -1,58 +0,0 @@ -(ns grafter.sequences - "A library of useful lazy sequences." - (:require [clojure.string :refer [blank?]])) - -(defn integers-from - "Returns an infinite sequence of integers counting from the supplied - starting number. - - Supports the use of an optional increment amount to allow increments - of arbitrary or negative amounts. - - If no arguments are supplied an infinite sequence of all positive - integers from 1 is returned." - ([] - (iterate inc 1)) - ([from] - (iterate inc from)) - ([from inc-by] - (iterate #(+ % inc-by) from))) - -(defn column-names-seq - "Given an alphabet string generate a lazy sequences of column names - e.g. - - `(column-names-seq \"abcdefghijklmnopqrstuvwxyz\") ;; => (\"a\" \"b\" \"c\" ... \"aa\" \"ab\")`" - [alphabet] - (->> (map str alphabet) - (iterate (fn [chars] - (for [x chars - y alphabet] - (str x y)))) - (apply concat))) - -(defn alphabetical-column-names - "Returns an infinite sequence of alphabetized column names. If more - than 26 are required the sequence will count AA AB AC ... BA BB BC - ... ZZZA ... etc" - [] - (column-names-seq "abcdefghijklmnopqrstuvwxyz")) - -(defn fill-when - "Takes a sequence of values and copies a value through the sequence - depending on the supplied predicate function. - - The default predicate function is not-blank? which means that a cell - will be copied through the sequence over blank cells until the next - non-blank one. For example: - - `(fill-when [:a \"\" \" \" :b nil nil nil]) ; => (:a :a :a :b :b :b :b)` - - A start value to copy can also be provided as the 3rd argument." - ([col] (fill-when (complement blank?) col)) - ([p col] (fill-when p col (first col))) - ([p col start] - (when (seq col) - (let [f (first col) - current (if (p f) f start)] - (cons current (lazy-seq (fill-when p (next col) current))))))) diff --git a/src/tabular/grafter/tabular.clj b/src/tabular/grafter/tabular.clj deleted file mode 100644 index c85fd24e..00000000 --- a/src/tabular/grafter/tabular.clj +++ /dev/null @@ -1,667 +0,0 @@ -(ns grafter.tabular - "Functions for processing tabular data." - (:require [clojure - [set :as set] - [string :as str]] - [grafter.tabular.common :as tabc :refer [lift->vector map-keys]] - [incanter.core :as inc] - [potemkin.namespaces :refer [import-vars]] - [grafter.pipeline.types :as types]) - (:import incanter.core.Dataset)) - -;; Load protocol definitions. This could occur in the ns definition but putting -;; them in their means that namespace refactoring tools can clear them out -;; accidentally. Better to explicitly require them to ensure they're loaded. -(require '[grafter.tabular - [csv] - [excel]]) - -;; This one is necessary for import-vars - again separating these from the ns -;; definition protects them against overzealous refactoring tools. -(require '[grafter.tabular.melt]) - -(import-vars - [grafter.tabular.common - dataset? - column-names - make-dataset - move-first-row-to-header - read-dataset - read-datasets - write-dataset - with-metadata-columns - without-metadata-columns - resolve-column-id] - [grafter.tabular.melt - melt]) - -(defmethod types/parse-parameter [String ::types/tabular-dataset] [_ val opts] - (read-dataset val)) - -(swap! types/parameter-types derive incanter.core.Dataset ::types/tabular-dataset) - -(prefer-method types/parse-parameter [String ::types/file] [String java.util.Map]) - -(defn test-dataset - "Constructs a test dataset of r rows by c cols e.g. - -`(test-dataset 2 2) ;; =>` - -| A | B | -|---|---| -| 0 | 0 | -| 1 | 1 |" - [r c] - (->> (iterate inc 0) - (map #(repeat c %)) - (take r) - make-dataset)) - -(defn- invalid-column-keys - "Takes a sequence of column key names and a dataset and returns a - sequence of keys that are not in the dataset." - [dataset keys] - - (let [not-found (Object.) - not-found-items (->> keys - (map (fn [col] - [col (resolve-column-id dataset col not-found)])) - (filter (fn [[_ present]] (= not-found present))) - (map first))] - not-found-items)) - -(defn- resolve-all-col-ids [dataset source-cols] - (map (partial resolve-column-id dataset) source-cols)) - -(defn- all-columns - "Takes a dataset and a finite sequence of column identifiers. - - If you want to use infinite sequences of columns or allow the - specification of more cols than are in the data without error you - should use columns instead. Using an infinite sequence with this - function will result in non-termination. - - Unlike the columns function this function will raise an - IndexOutOfBoundsException if a specified column is not actually - found in the Dataset." - [dataset cols] - (let [original-meta (meta dataset) - not-found-items (invalid-column-keys dataset cols)] - (if (and (empty? not-found-items) - (some identity cols)) - (let [resolved-cols (resolve-all-col-ids dataset cols) - rows (->> dataset :rows (map #(select-keys % resolved-cols)))] - (with-meta (make-dataset rows resolved-cols) original-meta)) - (throw (IndexOutOfBoundsException. (str "The columns: " - (str/join ", " not-found-items) - " are not currently defined.")))))) - -(defn- indexed [col] - (map-indexed vector col)) - -(defn- rows-bounded [row-data row-numbers] - (let [row-numbers (into #{} row-numbers)] - (->> row-data - (filter (fn [[index row]] - (if (row-numbers index) - true - false))) - (map second)))) - -(defn- select-indexed - "Selects indexed rows or columns (outside of the dataset). Assumes the seq of - row-numbers to select on is ordered, and that row-data is a tuple - of form `[index row]`. - - Returns a lazy sequence of matched rows." - [[[index current-item] & item-data] - [current-item-number & rest-item-numbers :as item-numbers]] - (cond - (or (nil? current-item-number) - (nil? index) - (= ::not-found current-item-number)) [] - - (= current-item-number index) (let [[repeated-item-numbers remaining-item-numbers] - (split-with #(= current-item-number %) item-numbers) - repeated-items (repeat (count repeated-item-numbers) current-item)] - (lazy-cat - repeated-items - (select-indexed item-data remaining-item-numbers))) - - (< current-item-number index) (select-indexed - (drop-while (fn [[index item]] - (not= index current-item-number)) - item-data) - rest-item-numbers) - (> current-item-number index) (select-indexed - (drop-while (fn [[index item]] - (not= index current-item-number)) - item-data) - ;; leave item-numbers as is (i.e. stay on current item after fast forwarding the data) - item-numbers))) - -(defn rows - "Takes a dataset and a seq of row-numbers and returns a dataset - consisting of just the supplied rows. If a row number is not found - the function will assume it has consumed all the rows and return - normally." - [dataset row-numbers] - (let [original-meta (meta dataset) - original-columns (column-names dataset) - rows (indexed (tabc/to-list dataset)) - filtered-rows (select-indexed rows row-numbers)] - - (-> (make-dataset filtered-rows - original-columns) - (with-meta original-meta)))) - -;; This type hint is actually correct as APersistentVector implements .indexOf -;; from java.util.List. -(defn- col-position [^java.util.List column-names col] - (if-let [canonical-col (tabc/resolve-col-id col column-names ::not-found)] - (let [val (.indexOf column-names canonical-col)] - (if (not= -1 val) - val - ::not-found)))) - -(defn- elided-col-description - "Print elided descriptions of columns for error messages with a sample set and - the rest hidden behind an elipsis, e.g. \":one, :two, :three ...\"" - [coll] - (let [[examples more] (take 2 (partition-all 3 coll)) - csv (str/join ", " examples) - ellision (when (seq more) - " ...")] - (str csv ellision))) - -(defn columns - "Given a dataset and a sequence of column identifiers, columns - narrows the dataset to just the supplied columns. - - Columns specified in the selection that are not included in the - Dataset will be silently ignored. - - The order of the columns in the returned dataset will be determined - by the order of matched columns in the selection. - - The supplied sequence of columns are first cropped to the number of - columns in the dataset before being selected, this means that - infinite sequences can safely supplied to this function." - [dataset cols] - (let [col-names (column-names dataset) - max-cols (count (:column-names dataset)) - restrained-cols (take max-cols cols) - matched-col-positions (->> restrained-cols - (map (partial col-position col-names))) - valid-positions (filterv #(not= ::not-found %) matched-col-positions) - selected-cols (map #(nth col-names %) valid-positions)] - (if (seq selected-cols) - (all-columns dataset selected-cols) - (throw (IndexOutOfBoundsException. (str "The columns: " - (elided-col-description cols) - " are not currently defined.")))))) - -(defn rename-columns - "Renames the columns in the dataset. Takes either a map or a - function. If a map is passed it will rename the specified keys to - the corresponding values. - - If a function is supplied it will apply the function to all of the - column-names in the supplied dataset. The return values of this - function will then become the new column names in the dataset - returned by rename-columns." - [dataset col-map-or-fn] - {:pre [(or (map? col-map-or-fn) - (ifn? col-map-or-fn))]} - - (if (map? col-map-or-fn) - (rename-columns dataset (fn [col] (col-map-or-fn col col))) - (let [original-meta (meta dataset) - old-key->new-key (partial map-keys col-map-or-fn) - new-columns (map col-map-or-fn - (column-names dataset))] - - (-> (make-dataset (tabc/to-list dataset) - new-columns) - (with-meta original-meta))))) - -(defn reorder-columns - "Reorder the columns in a dataset to the supplied order. An error - will be raised if the supplied set of columns are different to the - set of columns in the dataset." - [{:keys [column-names] :as ds} cols] - (let [ds-cols (set column-names) - supplied-cols (map (partial tabc/resolve-column-id ds) cols)] - - (when (not= ds-cols (set supplied-cols)) - (throw (ex-info (str "The set of supplied column names " supplied-cols - " must be equal to those in the dataset " ds-cols - " to reorder.") - {:type :reorder-columns-error - :dataset-columns column-names - :supplied-columns supplied-cols}))) - (assoc ds :column-names supplied-cols))) - -(defn drop-rows - "Drops the first n rows from the dataset, retaining the rest." - [dataset n] - (tabc/pass-rows dataset (partial drop n))) - -(defn take-rows - "Takes only the first n rows from the dataset, discarding the rest." - [dataset n] - (tabc/pass-rows dataset (partial take n))) - -(defn- resolve-keys [headers hash] - (map-keys #(tabc/resolve-col-id % headers nil) hash)) - -(defn- select-row-values [src-col-ids row] - (map #(get row %) src-col-ids)) - -(defn- apply-f-to-row-hash [src-col-ids new-header f row] - (let [args-from-cols (select-row-values src-col-ids row) - new-col-val (apply f args-from-cols) - new-column-hash (resolve-keys new-header new-col-val)] - (merge row new-column-hash))) - -(defn derive-column - "Adds a new column to the end of the row which is derived from -column with position col-n. f should just return the cells value. - -If no f is supplied the identity function is used, which results in -the specified column being cloned." - - ([dataset new-column-name from-cols] - (derive-column dataset new-column-name from-cols identity)) - - ([dataset new-column-name from-cols f] - (let [original-meta (meta dataset) - original-columns (column-names dataset) - from-cols (lift->vector from-cols) - resolved-from-cols (resolve-all-col-ids dataset from-cols)] - - (-> (make-dataset (->> dataset - :rows - (map (fn [row] - (let [args-from-cols (select-row-values resolved-from-cols row) - new-col-val (apply f args-from-cols)] - (assoc row new-column-name new-col-val))))) - (concat original-columns [new-column-name])) - (with-meta original-meta))))) - -(defn add-column - "Add a new column to a dataset with the supplied value lazily copied - into every row within it." - - [dataset new-column value] - (let [ignored-column-id 0] - ;; all real datasets have a 0th column but grafter doesn't - ;; currently work with empty 0x0 datasets. We should support this - ;; case. - ;; - ;; TODO when we support these: https://trello.com/c/cdmlw7Xv we - ;; should update this code to work with empty datasets too. - (derive-column dataset new-column ignored-column-id (constantly value)))) - -(defn- infer-new-columns-from-first-row [dataset source-cols f] - (let [source-cols (resolve-all-col-ids dataset source-cols) - first-row-values (->> dataset - :rows - first - (select-row-values source-cols)) - first-result (apply f first-row-values) - new-col-ids (keys first-result)] - - new-col-ids)) - -(defn add-columns - "Add several new columns to a dataset at once. There are a number of different parameterisations: - - `(add-columns ds {:foo 10 :bar 20})` - - Calling with two arguments where the second argument is a hash map - creates new columns in the dataset for each of the hashmaps keys and - copies the hashes values lazily down all the rows. This - parameterisation is designed to work well build-lookup-table. - - When given either a single column id or many along with a function - which returns a hashmap, add-columns will pass each cell from the - specified columns into the given function, and then associate its - returned map back into the dataset. e.g. - - `(add-columns ds \"a\" (fn [a] {:b (inc a) :c (inc a)} )) ; =>` - - | a | :b | :c | - |---|----|----| - | 0 | 1 | 1 | - | 1 | 2 | 2 | - - As a dataset needs to know its columns in this case it will infer - them from the return value of the first row. If you don't want to - infer them from the first row then you can also supply them like so: - - `(add-columns ds [:b :c] \"a\" (fn [a] {:b (inc a) :c (inc a)} )) ; =>` - - | a | :b | :c | - |---|----|----| - | 0 | 1 | 1 | - | 1 | 2 | 2 |" - - ([dataset hash] - (let [merge-cols (fn [ds k] - (add-column ds k (hash k))) - keys (-> hash keys sort)] - ;; Yes, this is actually lazy with respect to rows, as we're - ;; just reducing new lazy columns onto our dataset. - (reduce merge-cols dataset keys))) - - ([dataset source-cols f] - (let [source-cols (lift->vector source-cols) - new-col-ids (infer-new-columns-from-first-row dataset source-cols f)] - (add-columns dataset new-col-ids source-cols f))) - - ([dataset new-col-ids source-cols f] - (let [original-meta (meta dataset) - source-cols (lift->vector source-cols) - new-header (concat (:column-names dataset) new-col-ids) - col-ids (resolve-all-col-ids dataset source-cols) - apply-f-to-row (partial apply-f-to-row-hash col-ids new-header f)] - - (-> (make-dataset (map apply-f-to-row (:rows dataset)) - new-header) - (with-meta original-meta))))) - -(defn- grep-row [dataset f] - (let [original-meta (meta dataset) - filtered-data (filter f (:rows dataset))] - (-> (make-dataset filtered-data - (column-names dataset)) - (with-meta original-meta)))) - -(defmulti grep - "Filters rows in the table for matches. This is multi-method - dispatches on the type of its second argument. It also takes any - number of column numbers as the final set of arguments. These - narrow the scope of the grep to only those columns. If no columns - are specified then grep operates on all columns." - (fn [table f & cols] (class f))) - -(defn- cells-from-columns - "Returns a seq of cells matching the supplied columns, cells are - stripped of column names by this process. If no columns are specified all the cell - values for the row are returned." - [col-set row] - (->> row - (filter (fn [[k v]] (col-set k))) - (map second))) - -(defmethod grep clojure.lang.IFn - - [dataset f & cols] - (let [original-meta (meta dataset) - data (:rows dataset) - cols (if (nil? cols) - (column-names dataset) - (first cols)) - col-set (into #{} cols)] - - (-> (make-dataset (->> data - (filter (fn [row] - (some f - (cells-from-columns col-set row))))) - (column-names dataset)) - (with-meta original-meta)))) - -(defmethod grep java.lang.String [dataset s & cols] - (apply grep dataset (fn [^String cell] (.contains (str cell) s)) cols)) - -(defmethod grep java.util.regex.Pattern [dataset p & cols] - (apply grep dataset #(re-find p (str %)) cols)) - -;; grep with a sequence of integers is equivalent to using rows -(defmethod grep clojure.lang.Sequential [dataset row-numbers] - (rows dataset row-numbers)) - -(prefer-method grep clojure.lang.Sequential clojure.lang.IFn) - -(defmethod grep :default [dataset v & cols] - (apply grep dataset (partial = v) cols)) - -(defn- remove-indices [col & idxs] - "Removes the values at the supplied indexes from the given vector." - (let [pos (map - (sort idxs) (iterate inc 0)) - remove-index (fn [col pos] - (vec (concat (subvec col 0 pos) - (subvec col (inc pos)))))] - (reduce remove-index col pos))) - -(def _ "An alias for the identity function, used for providing positional arguments to mapc." identity) - -(defn- normalise-mapping - "Given a dataset and a map/vector mapping ids or positions to - values. Return a map with normalised keys that map to the - appropriate values. A normalised mapping will contain identity - mappings for any ommitted columns." - [dataset fs] - (let [resolve-ids (fn [id] (resolve-column-id dataset id id)) - fs-hash (if (vector? fs) - (zipmap (column-names dataset) fs) - (map-keys resolve-ids fs)) - other-hash (zipmap (vec (set/difference (set (:column-names dataset)) - (set (keys fs-hash)))) - (repeat identity)) - functions (conj fs-hash other-hash)] - - functions)) - -(defn- concat-new-columns - "Given a dataset and a set of column keys return an ordered vector - of the new column keys. - - Any new column ids will be concatenated onto the end of the existing - columns preserving the order as best as possible. - - Any duplicate ids found in col-ids will be removed." - - [dataset col-ids] - (let [existing-column-ids (:column-names dataset) - resolve-new-ids (fn [i] (resolve-column-id dataset i i))] - - (concat existing-column-ids - (remove (set existing-column-ids) (map resolve-new-ids col-ids))))) - -(defn mapc - "Takes a vector or a hashmap of functions and maps each to the key - column for every row. Each function should be from a cell to a - cell, where as with apply-columns it should be from a column to a - column i.e. its function from a collection of cells to a collection - of cells. - - If the specified column does not exist in the source data a new - column will be created, though the supplied function will need to - either ignore its argument or handle a nil argument." - [dataset fs] - (let [original-meta (meta dataset) - functions (normalise-mapping dataset fs) - new-columns (concat-new-columns dataset (keys functions)) - apply-functions (fn [row] ;; TODO consider using zipmap to do this job - (let [apply-column-f (fn [[col-id f]] - (let [fval (f (row col-id))] - {col-id fval}))] - (apply merge (map apply-column-f - functions))))] - - (-> (make-dataset (->> dataset :rows (map apply-functions)) - new-columns) - (with-meta original-meta)))) - -(defn apply-columns - "Like mapc in that you associate functions with particular columns, - though it differs in that the functions given to mapc should receive - and return values for individual cells. - - With apply-columns, the function receives a collection of cell - values from the column and should return a collection of values for - the column. - - It is also possible to create new columns with apply-columns for - example to assign row ids you can do: - - `(apply-columns ds {:row-id (fn [_] (grafter.sequences/integers-from 0))})`" - [dataset fs] - (let [original-meta (meta dataset) - functions (normalise-mapping dataset fs) - new-columns (concat-new-columns dataset (keys functions)) - apply-columns-f (fn [rows] - ;; TODO consider implementing this in - ;; terms of either incanter.core/to-map - ;; or zipmap - (let [apply-to-cols (fn [[col f]] - (->> rows - (map (fn [r] (get r col))) - f - (map (fn [r] {col r}))))] - (->> functions - (map apply-to-cols) - (apply (partial map merge)))))] - - (-> (make-dataset (->> dataset :rows apply-columns-f) - new-columns) - (with-meta original-meta)))) - -(defn swap - "Takes an even numer of column names and swaps each column" - - ([dataset first-col second-col] - (let [original-meta (meta dataset) - data (:rows dataset) - header (column-names dataset) - swapper (fn [v i j] - (-> v - (assoc i (v j)) - (assoc j (v i))))] - - (-> (make-dataset data - (-> header - (swapper (col-position header first-col) - (col-position header second-col)))) - (with-meta original-meta)))) - - ([dataset first-col second-col & more] - (if (even? (count more)) - (if (seq more) - (reduce (fn [ds [f s]] - (swap ds f s)) - (swap dataset first-col second-col) - (partition 2 more)) - (swap dataset first-col second-col)) - (throw (Exception. "Number of columns should be even"))))) - -(defn- remaining-keys [dataset key-cols] - (let [remaining-keys (->> key-cols - (set/difference (set (:column-names dataset))))] - - remaining-keys)) - -(defn- order-values [key-cols hash] - (map #(get hash %) key-cols)) - -(defn resolve-key-cols [dataset key-cols] - (->> (set (lift->vector key-cols)) - (order-values key-cols) - (resolve-all-col-ids dataset))) - -(defn build-lookup-table - "Takes a dataset, a vector of any number of column names corresponding - to key columns and a column name corresponding to the value - column. - Returns a function, taking a vector of keys as - argument and returning the value wanted" - ([dataset key-cols] - (build-lookup-table dataset key-cols nil)) - - ([dataset key-cols return-keys] - (let [key-cols (resolve-key-cols dataset (lift->vector key-cols)) - return-keys (resolve-all-col-ids dataset - (if (nil? return-keys) - (remaining-keys dataset key-cols) - (lift->vector return-keys))) - - keys (->> (all-columns dataset key-cols) - :rows - (map (fn [hash] - (let [v (vals hash)] - (if (= (count v) 1) - (first v) - ;; else return them in key-col order - (order-values key-cols hash)))))) - val (:rows (all-columns dataset return-keys)) - table (zipmap keys val)] - table))) - -(defn ^:no-doc get-column-by-number* - "This function is intended for use by the graph-fn macro only, and - should not be considered part of this namespaces public interface. - It is only public because it is used by a macro." - [ds row index] - (let [col-name (grafter.tabular/resolve-column-id ds index ::not-found)] - (if-not (= col-name ::not-found) - (get row col-name ::not-found)))) - -(defn- generate-vector-bindings [ds-symbol row-symbol row-bindings] - (let [bindings (->> row-bindings - (map-indexed (fn [index binding] - [binding `(get-column-by-number* ~ds-symbol ~row-symbol ~index)])) - (apply concat) - (apply vector))] - bindings)) - -(defn- splice-supplied-bindings [row-sym row-bindings] - `[~row-bindings ~row-sym]) - -(defmacro graph-fn - "A macro that defines an anonymous function to convert a tabular - dataset into a graph of RDF quads. Ultimately it converts a - lazy-seq of rows inside a dataset, into a lazy-seq of RDF - Statements. - - The function body should be composed of any number of forms, each of - which should return a sequence of RDF quads. These will then be - concatenated together into a flattened lazy-seq of RDF statements. - - Rows are passed to the function one at a time as hash-maps, which - can be destructured via Clojure's standard destructuring syntax. - - Additionally destructuring can be done on row-indicies (when a - vector form is supplied) or column names (when a hash-map form is - supplied)." - - [[row-bindings] & forms] - {:pre [(or (symbol? row-bindings) (map? row-bindings) - (vector? row-bindings))]} - - (let [row-sym (gensym "row") - ds-sym (gensym "ds")] - `(with-meta (fn [~ds-sym] - (let [ds-rows# (:rows ~ds-sym) - ds-meta# (meta ~ds-sym)] - (letfn [(graphify-row# [~row-sym] - (let ~(if (vector? row-bindings) - (generate-vector-bindings ds-sym row-sym row-bindings) - (splice-supplied-bindings row-sym row-bindings)) - (->> (concat ~@forms) - (map (fn ~'with-row-meta [triple#] - (let [meta# {::row ~row-sym} - meta# (if ds-meta# - (assoc meta# ::dataset ds-meta#) - meta#)] - (with-meta triple# meta#)))))))] - - (mapcat graphify-row# ds-rows#)))) - ;; Add metadata to function definition to support - ;; grafter.rdf.preview/graph-preview functionality. - ;; - ;; NOTE: We quote these form to prevent infinite recursive expansion of - ;; the macro - {::template (quote ~&form) - ::defined-in-ns (quote ~(.getName *ns*))}))) diff --git a/src/tabular/grafter/tabular/common.clj b/src/tabular/grafter/tabular/common.clj deleted file mode 100644 index 44b9852d..00000000 --- a/src/tabular/grafter/tabular/common.clj +++ /dev/null @@ -1,353 +0,0 @@ -(ns grafter.tabular.common - {:no-doc true} - (:require [clj-excel.core :as xls] - [grafter.sequences :as seqs] - [clojure.java.io :as io] - [incanter.core :as inc] - [me.raynes.fs :as fs]) - (:import [org.apache.poi.ss.usermodel Sheet] - [incanter.core Dataset] - [java.io File InputStream] - [java.net URI URL])) - - -(defn mapply - "Like apply, but f takes keyword arguments and the last argument is - not a seq but a map with the arguments for f" - [f & args] - {:pre [(let [kwargs (last args)] (or (map? kwargs) (nil? kwargs)))]} - (apply f (apply concat - (butlast args) (last args)))) - -(defn move-first-row-to-header - "For use with make-dataset. Moves the first row of data into the - header, removing it from the source data." - [[first-row & other-rows]] - - [first-row other-rows]) - -(defn- fill-gaps-with-nil [rows headers] - (let [blank-row (zipmap headers (repeat (count headers) nil)) - pad-with-nils (fn [row] (let [row-map (if (map? row) row (zipmap headers row))] - (merge blank-row row-map)))] - (map pad-with-nils rows))) - -(defmulti to-list - " - Returns a list-of-lists if the given matrix is two-dimensional - and a flat list if the matrix is one-dimensional. - - Replaces incanter's to-list with a version that doesn't hold onto the head. - " - type) - -(defmethod to-list :incanter.core/matrix - ([^clatrix.core.Matrix mat] - (clatrix.core/as-vec mat))) - -(defmethod to-list :incanter.core/dataset - [data] - (let [original-columns (vec (:column-names data))] - (map (fn [row] (map (fn [col] (row col)) - original-columns)) - (:rows data)))) - -(defmethod to-list :default [s] s) - -(defmethod to-list nil [s] nil) - - -(defn make-dataset - "Like incanter's dataset function except it can take a lazy-sequence - of column names which will get mapped to the source data. - - Works by inspecting the amount of columns in the first row, and - taking that many column names from the sequence. - - Inspects the first row of data to determine the number of columns, - and creates an incanter dataset with columns named alphabetically as - by grafter.sequences/column-names-seq." - - ([] - (inc/dataset [])) - - ([data] - (if (inc/dataset? data) - data - (let [columns (take (-> data first count) (seqs/alphabetical-column-names))] - (make-dataset data columns)))) - - ([data columns-or-f] - (let [original-meta (meta data) - data-seq (if (inc/dataset? data) (to-list data) data) - [column-headers rows] (if (fn? columns-or-f) - (columns-or-f data-seq) - [columns-or-f data-seq]) - full-data (fill-gaps-with-nil rows column-headers)] - (-> (inc/dataset column-headers full-data) - (with-meta original-meta))))) - -(defn dataset? - "Predicate function to test whether the supplied argument is a - dataset or not." - [ds] - (or (instance? incanter.core.Dataset ds) - (and (map? ds) - (:rows ds) - (:column-names ds)))) - -(def column-names - "If given a dataset, it returns its column names. If given a dataset and a sequence - of column names, it returns a dataset with the given column names." - inc/col-names) - -(defn pass-rows - "Passes the function f the collection of raw rows from the dataset - and returns a new dataset containing (f rows) as its data. - - f should expect a collection of row maps and return a collection of - rows. - - This function is intended to be used by Grafter itself and Grafter - library authors. It's not recommended to by users of the DSL - because users of this function need to be aware of Dataset - implementation details." - [dataset f] - (let [original-meta (meta dataset) - original-columns (column-names dataset)] - (-> (make-dataset (->> dataset :rows f) - original-columns) - (with-meta original-meta)))) - -(defn ^:no-doc extension - "Gets the extension for the given file name as a keyword, or nil if the file has no extension" - [f] - (when-let [^String ext (-> f fs/extension)] - (-> ext - (.substring 1) - keyword))) - -(defn format-or-type [ds {:keys [format]}] - (if (#{File String} (class ds)) - (or format (extension ds)) - (class ds))) - -(defn- get-format [source {:keys [format] :as opts}] - format) - -(defn assoc-data-source-meta [output-ds data-source] - "Adds metadata about where the dataset was loaded from to the object." - (let [source-meta (cond - (#{String File URI URL} (class data-source)) {:grafter.tabular/data-source data-source} - (instance? incanter.core.Dataset data-source) (meta data-source) - :else {:grafter.tabular/data-source :datasource-unknown}) ] - (with-meta output-ds (merge (meta output-ds) source-meta)))) - -(defmulti ^:no-doc read-dataset* - "Multimethod for adapter implementers to hook custom dataset readers - into grafter. - - API users should use the front end function read-dataset instead of - calling this." - - get-format) - -(defprotocol DatasetFormat - "Represents a type from which it may be possible to infer the format - of the contained data." - (infer-format [source] - "Attempt to infer the data format of the given source. Should - return a keyword if the format was inferred, or nil if the - inference failed.")) - -(extend-protocol DatasetFormat - String - (infer-format [s] (extension s)) - - File - (infer-format [f] (extension (.getName f))) - - java.net.URL - (infer-format [url] (extension (.getPath url))) - - java.net.URI - (infer-format [uri] (extension (.getPath uri))) - - nil - (infer-format [_] nil)) - -(defn- ^:no-doc infer-format-of - "Attempt to infer the format of the given data source. Returns nil - if the format could not be inferred." - [source] - (if (satisfies? DatasetFormat source) - (infer-format source))) - -(defmulti read-dataset-source - "Opens a dataset from a datasetable thing e.g. a filename or an existing Dataset. - The multi-method dispatches based upon the type of the source. - - Supplied options are passed to the individual handler methods and they may - have their own requirements on the options provided." - ;; NOTE: This is not a protocol, because protocols don't give you a :default - ;; option for dispatch. - (fn [src opts] - (class src))) - -(defmulti write-dataset-source - "Writes a dataset from a datasetable thing e.g. a filename or an existing Dataset. - The multi-method dispatches based upon the type of the source. - - Supplied options are passed to the individual handler methods and they may - have their own requirements on the options provided." - ;; NOTE: This is not a protocol, because protocols don't give you a :default - ;; option for dispatch. - (fn [dest ds opts] - (class dest))) - -(defn- ^:no-doc dispatch-with-format-option - "Takes a function to call, a data source and an options hash containing an - optional :format key. - - If :format is not provided then an attempt will be made to infer it from the - data source via the DatasetFormat protocol. - - Once the format is resolved it then the target function is called with the - data source and options map." - [f source {:keys [format] :as opts}] - (if-let [format (or format (infer-format-of source))] - (f source (assoc opts :format format)) - (throw (IllegalArgumentException. (str "Please specify a format, it could not be inferred from the source: " source))))) - -(defmethod read-dataset-source Dataset [ds opts] ds) - -(defmethod read-dataset-source :default [source opts] - (dispatch-with-format-option read-dataset* source opts)) - -(defn read-dataset [source & {:as opts}] - (-> (read-dataset-source source opts) - (assoc-data-source-meta source))) - -(defmulti read-datasets-source - "Reads a sequence of datasets from a given data source given a map of - options. Dispatches on the type of the data source. - - NOTE: implementations for different source types may have different - requirements for the provided options." - (fn [source {:keys [sheet] :as opts}] - (when sheet - (throw (IllegalArgumentException. "read-datasets cannot open a single sheet. Use read-dataset* to do this."))) - (class source))) - -(defmulti read-datasets* - get-format) - -(defmethod read-datasets-source clojure.lang.Sequential [datasets opts] - datasets) - -(defmethod read-datasets-source :default [source opts] - (dispatch-with-format-option read-datasets* source opts)) - -(defn read-datasets - "Opens a lazy sequence of datasets from a something that returns multiple - datasetables - i.e. all the worksheets in an Excel workbook." - - [dataset & {:keys [format] :as opts}] - (read-datasets-source dataset opts)) - -(defmulti ^:no-doc write-dataset* - "Multi-method for adapter implementers to extend to allow - serialising datasets into various different formats." - (fn [destination dataset opts] - (when-not (dataset? dataset) - (throw (IllegalArgumentException. - (str "Could not write dataset to " destination " as " (class dataset) - " is not a valid Dataset. This error usually occurs if you try and generate tabular data from a graft")))) - (get-format destination opts))) - -(defn- ^:no-doc dispatch-write-with-format-option - "Same as above but for writer - so it takes an additional argument" - [f dest ds {:keys [format] :as opts}] - (if-let [format (or format (infer-format-of dest))] - (f dest ds (assoc opts :format format)) - (throw (IllegalArgumentException. (str "Please specify a format, it could not be inferred from the destination: " dest))))) - -(defmethod write-dataset-source :default [dest ds opts] - (dispatch-write-with-format-option write-dataset* dest ds opts)) - -(defmethod write-dataset-source Dataset [dest ds opts] ds) - -(defn write-dataset - [destination dataset & {:keys [format] :as opts}] - (write-dataset-source destination dataset opts)) - -(defn without-metadata-columns - "Ignores any possible metadata and leaves the dataset as is." - [[context data]] - data) - -(defn with-metadata-columns - "Takes a pair of [context, data] and returns a dataset. Where the - metadata context is merged into the dataset itself." - [[context data]] - (letfn [(merge-metadata-column [dataset-acc [k v]] - (inc/add-column k - (repeat v) - dataset-acc))] - (reduce merge-metadata-column data context))) - -(defn ^:no-doc dataset->seq-of-seqs - "Converts a dataset into a seq-of-seqs representation" - [dataset] - (let [col-order (:column-names dataset) - data (:rows dataset) - rows (map (fn [row] - (map (fn [item] - (get row item)) col-order)) - data) - output-data (concat [(map name col-order)] rows)] - output-data)) - -(defn ^:no-doc resolve-col-id [column-key headers not-found] - (let [converted-column-key (cond - (string? column-key) (keyword column-key) - (keyword? column-key) (name column-key) - (integer? column-key) (nth headers column-key not-found))] - (if-let [val (some #{column-key converted-column-key} headers)] - val - not-found))) - -(defn resolve-column-id - "Finds and resolves the column id by converting between symbols and - strings. If column-key is not found in the datsets headers then - not-found is returned." - - ([dataset column-key] (resolve-column-id dataset column-key nil)) - ([dataset column-key not-found] - - (let [headers (column-names dataset)] - (resolve-col-id column-key headers not-found)))) - -(defn ^:no-doc map-keys [f hash] - "Apply f to the keys in the supplied hashmap and return a new - hashmap." - (zipmap (map f (keys hash)) - (vals hash))) - -(defn ^:no-doc lift->vector [x] - "Lifts singular values into a sequential collection. If the given argument is sequential then it is returned, otherwise a sequential - container containing the value is returned." - (if (sequential? x) x [x])) - -(defmacro register-format-alias - "Register an extra format alias to be handled by a root multi-method (either - read-dataset*, read-datasets* or write-dataset*. - - This works by building defmethod definitions that delegate to the root-key - dispatch value for each of the supplied aliases." - [multi-fn-symbol root-key alias] - - (let [args 'args] - `(defmethod ~multi-fn-symbol ~alias [& ~args] - (let [opts# (merge (last ~args) {:format ~root-key})] - (apply ~multi-fn-symbol (concat (drop-last ~args) [opts#])))))) diff --git a/src/tabular/grafter/tabular/csv.clj b/src/tabular/grafter/tabular/csv.clj deleted file mode 100644 index 95edbb50..00000000 --- a/src/tabular/grafter/tabular/csv.clj +++ /dev/null @@ -1,36 +0,0 @@ -(ns grafter.tabular.csv - {:no-doc true} - (:require [clojure.data.csv :as csv] - [clojure.java.io :as io] - [grafter.tabular.common :as tab] - [grafter.rdf.protocols :refer [raw-value]]) - (:import [java.io IOException] - [org.apache.commons.io.input BOMInputStream])) - -(defmethod tab/read-dataset* :csv - [source opts] - (let [reader (-> source - io/input-stream - BOMInputStream. - (#(tab/mapply io/reader % opts))) - csv-seq (tab/mapply csv/read-csv reader opts)] - (if (nil? csv-seq) - (throw (IOException. (str "There was an error loading the CSV file: " source))) - (tab/make-dataset csv-seq)))) - -(defmethod tab/read-datasets* :csv - [source opts] - (when-let [ds (tab/mapply tab/read-dataset source opts)] - [{"csv" ds}])) - -(defmethod tab/write-dataset* :csv [destination dataset {:keys [format] :as opts}] - (with-open [output (io/writer destination)] - (let [rows (tab/dataset->seq-of-seqs dataset) - stringified-rows (map (partial map raw-value) rows)] - (apply csv/write-csv output - stringified-rows - (mapcat identity opts))))) - -(tab/register-format-alias tab/read-dataset* :csv "text/csv") -(tab/register-format-alias tab/read-datasets* :csv "text/csv") -(tab/register-format-alias tab/write-dataset* :csv "text/csv") diff --git a/src/tabular/grafter/tabular/edn.clj b/src/tabular/grafter/tabular/edn.clj deleted file mode 100644 index 16e703ec..00000000 --- a/src/tabular/grafter/tabular/edn.clj +++ /dev/null @@ -1,31 +0,0 @@ -(ns grafter.tabular.edn - {:no-doc true} - (:require [grafter.tabular.common :as tab] - [clojure.edn :as edn] - [incanter.core :refer [->Dataset]] - [clojure.java.io :refer [reader writer]]) - (:import [java.io File IOException] - [incanter.core Dataset])) - -(defn- load-dataset [contents] - (apply ->Dataset contents)) - -(defmethod tab/read-dataset* :edn - [source opts] - - ;; TODO: make this read lazily - (let [edn-value (edn/read-string {:readers {'incanter.core.Dataset load-dataset}} - (slurp source))] - (if (instance? Dataset edn-value) - edn-value - (throw (ex-info (str "Unexpected object found in edn file. Expected a Dataset, and got a " edn-value) {:error :file-format-error}))))) - -;; TODO read-datasets* - -(defmethod tab/write-dataset* :edn [destination dataset opts] - (with-open [out (writer destination)] - (print-dup dataset out))) - -(tab/register-format-alias tab/read-dataset* :edn "application/edn") -;;(tab/register-format-alias tab/read-datasets* :edn "application/edn") -(tab/register-format-alias tab/write-dataset* :csv "application/edn") diff --git a/src/tabular/grafter/tabular/excel.clj b/src/tabular/grafter/tabular/excel.clj deleted file mode 100644 index 9827e8a4..00000000 --- a/src/tabular/grafter/tabular/excel.clj +++ /dev/null @@ -1,115 +0,0 @@ -(ns grafter.tabular.excel - {:no-doc true} - (:require [clj-excel.core :as xls] - [grafter.tabular.common :as tab] - [clojure.java.io :as io]) - (:import - [java.net URI URL] - [org.apache.poi.ss.usermodel Cell])) - -;; Extend the clj-excel multi-method to handle expected grafter types -;; when outputting as an Excel file. -(defmethod xls/cell-mutator org.openrdf.model.URI [^Cell cell ^org.openrdf.model.URI val] (.setCellValue cell (str val))) -(defmethod xls/cell-mutator URI [^Cell cell ^URI val] (.setCellValue cell (str val))) -(defmethod xls/cell-mutator URL [^Cell cell ^URI val] (.setCellValue cell (str val))) -(defmethod xls/cell-mutator :default [^Cell cell val] (.setCellValue cell (str val))) - -(defn- sheets - "Returns a seq of maps from sheet-name => sheet-data in the order - they are in the workbook." - [wb & [fname]] - (map (fn [name sheet] - {name (let [ds (tab/make-dataset (xls/lazy-sheet sheet))] - (if fname - (tab/assoc-data-source-meta ds fname) - ds))}) - (xls/sheet-names wb) (xls/sheets wb))) - -(defn- get-sheet-map [sheet-seq sheet] - (if sheet - (apply merge sheet-seq) - (first sheet-seq))) - -(defn- get-sheet [sheet-map sheet] - (if sheet - (get sheet-map sheet) - (first (vals sheet-map)))) - -(defn- read-dataset** [wb {:keys [sheet] :as opts}] - (-> wb - sheets - (get-sheet-map sheet) - (get-sheet sheet))) - -(defmethod tab/read-dataset* :xls - [source opts] - (-> source - xls/workbook-hssf - (read-dataset** opts))) - -(defmethod tab/read-dataset* "application/vnd.ms-excel" - [source opts] - (tab/read-dataset* source (merge opts {:format :xls}))) - -(defmethod tab/read-dataset* "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" - [source opts] - (tab/read-dataset* source (merge opts {:format :xlsx}))) - -(defmethod tab/read-dataset* :xlsx - [source opts] - (-> source - xls/workbook-xssf - (read-dataset** opts))) - -(defmethod tab/read-datasets* :xls - [source opts] - (-> source - xls/workbook-hssf - (sheets source))) - -(defmethod tab/read-datasets* :xlsx - [source opts] - (-> source - xls/workbook-xssf - (sheets source))) - -(defmethod tab/read-datasets* "application/vnd.ms-excel" - [source opts] - (tab/read-datasets* source (merge opts {:format :xls}))) - -(defmethod tab/read-datasets* "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" - [source opts] - (tab/read-datasets* source (merge opts {:format :xlsx}))) - -(defn write-dataset** [destination wb dataset-map] - (with-open [output (io/writer destination)] - (xls/save (xls/build-workbook wb dataset-map) - destination))) - -(defmethod tab/write-dataset* :xlsx - [destination dataset {:keys [format sheet-name] :or {sheet-name "Sheet1" } :as opts}] - (write-dataset** destination (xls/workbook-xssf) - {sheet-name (tab/dataset->seq-of-seqs dataset) })) - -(defmethod tab/write-dataset* :xls - [destination dataset {:keys [format sheet-name] :or {sheet-name "Sheet1" } :as opts}] - - (write-dataset** destination (xls/workbook-hssf) - {sheet-name (tab/dataset->seq-of-seqs dataset) })) - -(defmethod tab/write-dataset* "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" - [destination dataset opts] - (tab/write-dataset* destination dataset (merge opts {:format :xlsx}))) - -(defmethod tab/write-dataset* "application/vnd.ms-excel" - [destination dataset opts] - (tab/write-dataset* destination dataset (merge opts {:format :xls}))) - -(tab/register-format-alias tab/read-dataset* :xls "application/vnd.ms-excel") -(tab/register-format-alias tab/read-dataset* :xlsx "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") - -(tab/register-format-alias tab/read-datasets* :xls "application/vnd.ms-excel") -(tab/register-format-alias tab/read-datasets* :xlsx "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") - -(tab/register-format-alias tab/write-dataset* :xls "application/vnd.ms-excel") -(tab/register-format-alias tab/write-dataset* :xlsx "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") diff --git a/src/tabular/grafter/tabular/melt.clj b/src/tabular/grafter/tabular/melt.clj deleted file mode 100644 index 958e308f..00000000 --- a/src/tabular/grafter/tabular/melt.clj +++ /dev/null @@ -1,141 +0,0 @@ -(ns ^:no-doc grafter.tabular.melt - "Functions for melting data and building variations on melt." - (:require - [clojure.set :as set] - [grafter.tabular.common :refer :all])) - -(defn mapcat-rows - "Transforms a dataset by performing a mapcat operation on the - rows. Each row in the input is transformed to multiple rows in the - output by the given transform function. - - Accepts the arguments: - - - dataset: The dataset to transform. - - - columns: The collection of columns in the output dataset. - - - f: function (Row -> Seqable Row) which transforms each row in the - input into a sequence of new rows in the output. - - Each output row should contain the columns passed in the columns - parameter." - - [dataset columns f] - (-> (make-dataset (mapcat f (:rows dataset)) columns) - (with-meta (meta dataset)))) - -(defn- melt-gen - "Generalised version of melt. - - Takes the following arguments: - - - dataset: The dataset to melt. - - - pivot-keys: The collection of fixed columns in the output table. - - - generated-column-names: The collection of new columns in the - output table. The output table will have (concat pivot-keys - generated-column-names) columns. - - - col-partition-fn: Function to group the non-fixed columns into a - number of partitions. A new row will be created for every partition - in every row in the input table. For example, if the input table has - 4 rows, and col-partition-fn creates 3 groups for the non-fixed - columns in the input, then the output table will contain 3 * 4 = 12 - rows. - - - row-builder-fn: Function (ColumnPartition -> Row -> RowFragment). - This function is used to create the variable fragment of an output - row given the source row in the input table and the correspdoning - column partition returned from col-partition-fn. The row in the - output table is created by merging the variable fragment returned - from this function with the fixed part defined by the - pivot-keys. The returned fragment should contain values for the - column names in the generated-column-names parameter." - {:doc/format :markdown} - - [dataset pivot-keys generated-column-names col-partition-fn row-builder-fn] - (let [canonicalise-key (partial resolve-column-id dataset) - pivot-keys (map canonicalise-key (lift->vector pivot-keys)) - input-columns (map canonicalise-key (column-names dataset)) - output-columns (concat pivot-keys generated-column-names) - melted-columns (set/difference (set input-columns) (set pivot-keys)) - ordered-melted-columns (keep melted-columns input-columns) - col-partition (col-partition-fn ordered-melted-columns) - f (fn [row] - (let [pivot-values (select-keys row pivot-keys)] - (map (fn [cp] (merge pivot-values (row-builder-fn cp row))) col-partition)))] - (mapcat-rows dataset output-columns f))) - -(defn melt - "Melt an object into a form suitable for easy casting, like a melt function in R. - It accepts multiple pivot keys (identifier variables that are - reproduced for each row in the output). - - `(use '(incanter core charts datasets))` - - `(view (with-data (melt (get-dataset :flow-meter) :Subject)` - - `(line-chart :Subject :value :group-by :variable :legend true)))` - - See http://www.statmethods.net/management/reshape.html for more - examples." - {:doc/format :markdown} - [dataset pivot-keys] - (letfn [(col-partition [cols] (map (fn [c] [c]) cols)) - (build-row [[c] row] {:variable c :value (row c)})] - (melt-gen dataset pivot-keys [:variable :value] col-partition build-row))) - -(defn ^:no-doc melt-column-groups - "Melts a dataset into groups defined by the list of given column - names. Given a collection of pivot columns and a collection of group - column names, this splits each row in the input into a collection of - groups and creates a row in the output for each group. The groups - are all the length of the column name group and it is an error if - the size of the group does not divide the number of non-fixed - columns exactly. - - For example, given an input table: - - | :measure | :q1-2013 | :q2-2013 | :q3-2013 | :q4-2013 | :q1-2014 | :q2-2014 | :q3-2014 | :q4-2014 | - |--------------------------------------------------------------------------------------------------| - | :sales | 100 | 250 | 200 | 400 | 90 | 200 | 150 | 600 | - - This can be seen as a table with a fixed :measure column and two - groups containing four financial quarters. This table can be - converted with - - `(melt-column-groups [:sales] [:q1 :q2 :q3 :q4])` - - into the table: - - | :measure | :q1 | :q2 | :q3 | :q4 | - |----------|--------|-------|-------|-------| - | :sales | 100 | 250 | 200 | 400 | - | :sales | 90 | 200 | 150 | 600 | - - Takes the arguments: - - - dataset: The input dataset to melt. - - - pivot-keys: The fixed group of columns to copy to each output row. - - - output-column-names: Collection of column names that defines the - gropus from the input row." - {:doc/format :markdown} - [dataset pivot-keys output-column-names] - (let [output-column-names (lift->vector output-column-names)] - (letfn [(col-partition [cols] - (let [group-size (count output-column-names) - col-count (count cols)] - (if (= 0 (mod col-count group-size)) - (partition group-size cols) - (throw (IllegalArgumentException. - (str "Column group size should be a multiple of the " - "number of non-fixed columns (" col-count ")."))))) - (partition (count output-column-names) cols)) - (build-row [cols row] - (let [col-map (zipmap cols output-column-names)] - (map-keys col-map (select-keys row cols))))] - (melt-gen dataset pivot-keys output-column-names col-partition build-row)))) diff --git a/test/grafter/pipeline/types_test.clj b/test/grafter/pipeline/types_test.clj deleted file mode 100644 index 3766093c..00000000 --- a/test/grafter/pipeline/types_test.clj +++ /dev/null @@ -1,92 +0,0 @@ -(ns grafter.pipeline.types-test - (:require [grafter.pipeline.types :as sut] - [clojure.test :as t]) - (:import java.net.URI)) - -;; require this to pull in the types from grafter.tabular (required -;; for the parameter-type-chain-prefer-method test) - -(require '[grafter.tabular]) - -(def a-class Class) - -(def a-keyword ::keyword) - -(t/deftest resolve-parameter-type-test - (t/is (thrown? IllegalArgumentException (= map (sut/resolve-parameter-type 'clojure.core/map))) - "Symbols resolve") - (t/is (= Class (sut/resolve-parameter-type 'grafter.pipeline.types-test/a-class)) - "Symbols pointing to vars pointing to classes resolve to classes") - (t/is (= a-keyword (sut/resolve-parameter-type 'grafter.pipeline.types-test/a-keyword)) - "Symbols to vars to keywords resolve to the keyword")) - -(t/deftest parse-parameter-test - (t/testing "::primitive values" - (t/are [type str-val coerced-val] - (let [parsed-param (sut/parse-parameter type str-val {})] - (t/is (= coerced-val parsed-param)) - (t/is (= type (class coerced-val)))) - - - Boolean "true" true - Boolean "nil" false - - Integer "1234" (int 1234) - Float "2.0" (float 2.0) - Double "3.3" 3.3 - Long "1234" 1234 - clojure.lang.BigInt "1234" (bigint 1234) - - String "Hello world" "Hello world" - clojure.lang.Keyword "coerced-into-a-keyword" :coerced-into-a-keyword - - URI "http://example.com/test" (URI. "http://example.com/test"))) - - (t/testing "::edn-primitive hierarchy" - (t/are [target-type str-val coerced-type msg] - (t/is (isa? @sut/parameter-types - (type (sut/parse-parameter target-type str-val {})) - coerced-type) - msg) - - - ::sut/url "http://example.com/test" URI - "::sut/url coerces to a java.net.URI"))) - -(swap! sut/parameter-types derive ::my-new-data-type ::sut/primitive) - -(swap! sut/parameter-types derive ::my-new-sub-type ::my-new-data-type) - -(t/deftest parameter-type-chain-test - (t/testing "parameter-type-chain Reports hierarchy ordered from sub-type to super-type" - (t/is (= [::my-new-sub-type - ::my-new-data-type - :grafter.pipeline.types/primitive - :grafter.pipeline.types/value - :grafter.pipeline.types/root-type] - (sut/parameter-type-chain ::my-new-sub-type))))) - - -;; Records implement java.util.Map, so can cause a problem in -;; inheritance hierarchy that needs to be resolve with -;; clojure.core/prefer-method. Create this in the tests to show how -;; parameter-type-chain obeys prefer-method too. - -(defrecord TestRecordType []) - -(defmethod sut/parse-parameter [String TestRecordType] [_ val opts] - ::incanter-dataset-method) - -(defmethod sut/parse-parameter [String java.util.Map] [_ val opts] - ::map-method) - -(prefer-method sut/parse-parameter grafter.pipeline.types_test.TestRecordType java.util.Map) - -(t/deftest parameter-type-chain-prefer-method - (t/testing "Uses clojure.core/prefer-method to resolve multiple-inheritance conflicts in ordering parameter-type-chain" - (t/is (= [grafter.pipeline.types_test.TestRecordType java.util.Map] - (sut/parameter-type-chain grafter.pipeline.types_test.TestRecordType)))) - - (t/testing "Uses clojure.core/prefer-method to resolve multiple-inheritance conflicts in ordering parameter-type-chain" - (t/is (= [incanter.core.Dataset :grafter.pipeline.types/tabular-dataset :grafter.pipeline.types/file :grafter.pipeline.types/root-type] - (sut/parameter-type-chain incanter.core.Dataset))))) diff --git a/test/grafter/pipeline_test.clj b/test/grafter/pipeline_test.clj deleted file mode 100644 index 40d1a06b..00000000 --- a/test/grafter/pipeline_test.clj +++ /dev/null @@ -1,259 +0,0 @@ -(ns grafter.pipeline-test - (:require [grafter.pipeline :refer :all] - [clojure.test :refer :all] - [grafter.pipeline.types :as types] - [grafter.tabular :refer [test-dataset]] - [grafter.rdf] - [schema.core :as s]) - (:import [java.net URI URL] - [java.util Map UUID] - [incanter.core Dataset])) - -(defn test-dataset-creator [rows cols] - (grafter.tabular/test-dataset rows cols)) - -(declare-pipeline test-dataset-creator [Integer Integer -> Dataset] - {rows "The number of rows of test data you want." - cols "The number of columns of test data you want."}) - -(defn convert-persons-data-to-graphs - [number-of-quads] - (->> (range number-of-quads) - (map #(grafter.rdf.protocols/->Quad (str "http://foo.bar/" %) "http://has-value/" %)))) - -(declare-pipeline convert-persons-data-to-graphs [Integer -> (Seq Statement)] - {number-of-quads "The number of quads."}) - -(def ArgumentType (s/either java.lang.Class s/Keyword)) - -(def PipelineSchema {s/Symbol {:name s/Symbol - :var clojure.lang.Var - (s/optional-key :display-name) s/Str - :namespace s/Symbol - :doc s/Str - :args [{:name s/Symbol :class ArgumentType :doc s/Str (s/optional-key :meta) {s/Keyword s/Any}}] - :type (s/either (s/eq :graft) (s/eq :pipe)) ;; one day maybe also :validation and a fallback of :function - :declared-args [s/Symbol] - :supported-operations #{(s/enum :append :delete)}} - }) - -(deftest declare-pipeline-test - (testing "declare-pipeline" - (let [errors (s/check PipelineSchema - @exported-pipelines)] - - (testing "Creates pipelines that match our schema" - (is (nil? errors))) - - (let [pipeline (@exported-pipelines 'grafter.pipeline-test/test-dataset-creator)] - (is (= 'grafter.pipeline-test/test-dataset-creator (:name pipeline)) - "Is keyed by its :name"))))) - -(defn display-name-pipeline [an-argument] - (grafter.tabular/test-dataset 2 2)) - -(declare-pipeline display-name-pipeline - "Display Name Pipeline" [String -> Dataset] - {an-argument "A string argument"}) - -(deftest declare-pipeline-with-display-name-test - (let [pipeline (get @exported-pipelines 'grafter.pipeline-test/display-name-pipeline)] - (is (= "Display Name Pipeline" (:display-name pipeline))))) - -(def a-quad (grafter.rdf.protocols/->Quad "http://foo.bar/1" "http://has-value/" 1 "http://some-graph/")) - -(defn map-pipeline-test [obj] - [a-quad]) - -(declare-pipeline map-pipeline-test - "Test pipeline for map objects" - [:grafter.pipeline.types/map -> Quads] - {obj "A map of key value pairs."}) - -(defn quads-pipeline [] - [a-quad]) - -(declare-pipeline quads-pipeline - "Test pipeline for map objects" - [-> Quads] - {}) - -(defn seq-quad-pipeline [] - [a-quad]) - -(declare-pipeline seq-quad-pipeline - "Test pipeline for map objects" - [-> Quads] - {}) - -(deftest declare-pipeline-with-test - (are [pipeline-name] - (let [pipeline (get @exported-pipelines pipeline-name)] - (is (= :graft (:type pipeline)))) - - 'grafter.pipeline-test/map-pipeline-test - 'grafter.pipeline-test/quads-pipeline - 'grafter.pipeline-test/seq-quad-pipeline)) - -(defn uuid-pipeline-test [uuid]) - -(declare-pipeline uuid-pipeline-test - "Test pipeline for map objects" - [UUID -> (Seq Statement)] - {uuid "A UUID"}) - -(defn url-pipeline-test [url] - []) - -(declare-pipeline url-pipeline-test - "Test pipeline for map objects" - [URL -> (Seq Statement)] - {url "A URL"}) - -(defn pipeline-string-argument-coercion [dataset string number bool hashmap - uri url uuid keyword instant] - (is (instance? incanter.core.Dataset dataset)) - (is (string? string)) - (is (number? number)) - (is (instance? Boolean bool)) - (is (map? hashmap)) - (is (instance? java.net.URI uri)) - (is (instance? java.net.URL url)) - (is (instance? java.util.UUID uuid)) - (is (keyword? keyword)) - (is (instance? java.util.Date instant)) - - (test-dataset 1 1)) - -(declare-pipeline pipeline-string-argument-coercion - "Test pipeline coerces arguments properly" - [Dataset String Integer Boolean ::types/map URI URL UUID clojure.lang.Keyword java.util.Date -> Dataset] - {dataset "A Dataset" - string "a String" - number "a Number" - bool "a boolean" - hashmap "A hashmap" - uri "A URI" - url "A URL" - uuid "A UUID" - keyword "A keyword" - instant "A date instant" - }) - -(deftest coerce-pipeline-arguments-test - (apply pipeline-string-argument-coercion (coerce-pipeline-arguments 'grafter.pipeline-test/pipeline-string-argument-coercion - ["./dev/resources/grafter/tabular/test.csv" - "a string" - "10" - "true" - "{ \"foo\" \"bar\"}" - "http://localhost/a/uri" - "http://localhost/a/url" - "cabec818-df6a-4c27-b445-117163e70227" - ":foo" - "2015"]))) - - -(deftest execute-pipeline-with-coerced-arguments-test - (is (= (test-dataset 1 1) - (execute-pipeline-with-coerced-arguments 'grafter.pipeline-test/pipeline-string-argument-coercion - ["./dev/resources/grafter/tabular/test.csv" - "a string" - "10" - "true" - "{ \"foo\" \"bar\"}" - "http://localhost/a/uri" - "http://localhost/a/url" - "cabec818-df6a-4c27-b445-117163e70227" - ":foo" - "2015"])))) - -(defn delete-only-pipeline [uri]) -(defn append-only-pipeline [uri]) - -(declare-pipeline delete-only-pipeline - [URI -> (Seq Statement)] - {uri "URI"} - :supported-operations #{:delete}) - -(declare-pipeline append-only-pipeline - [URI -> (Seq Statement)] - {uri "URI"}) - -(deftest supported-operations-test - (are [pipeline-sym expected] (= expected (get-in @exported-pipelines [pipeline-sym :supported-operations])) - 'grafter.pipeline-test/delete-only-pipeline #{:delete} - 'grafter.pipeline-test/append-only-pipeline #{:append})) - -(defn test-default-types [bool lng int biginteger dbl flt uri date kwd uuid] - (is (boolean? bool)) - (is (instance? Long lng)) - (is (integer? int)) - (is (instance? clojure.lang.BigInt biginteger)) - (is (double? dbl)) - (is (float? flt)) - (is (instance? java.net.URI uri)) - (is (instance? java.util.Date date)) - (is (keyword? kwd)) - (is (instance? java.util.UUID uuid)) - - [bool int biginteger dbl flt uri date kwd uuid]) - -(declare-pipeline test-default-types "Test pipeline" - [Boolean Long Integer clojure.lang.BigInt Double Float :grafter.pipeline.types/uri java.util.Date clojure.lang.Keyword java.util.UUID -> Quads] - {bool "A boolean value" - lng "A long" - int "An integer" - biginteger "A bigint" - dbl "A double" - flt "A float" - uri "A URI" - date "A date" - kwd "A keyword" - uuid "A UUID" - }) - -(deftest declare-pipeline-test-2 - (execute-pipeline-with-coerced-arguments 'grafter.pipeline-test/test-default-types - ["true" - "123" - "123456789" - "9999999999999999999999999" - "2.3" - "3.0" - "http://foo" - "2015" - ":a-keyword" - "04eccc7e-bddd-44e5-a299-8879512a3ceb"])) - -(defn symbol-deref-test-pipeline [klass kwd] - [klass kwd]) - -(def klass-symbol URI) - -(def keyword-symbol ::types/primitive) - -(def not-a-class-or-a-keyword "an unsupported type") - -(deftest declare-pipeline-dereferencing-test - (testing "declare-pipeline resolves classes or keywords from vars" - (is (nil? (eval `(declare-pipeline symbol-deref-test-pipeline - [klass-symbol keyword-symbol ~'-> ~'(Seq Statement)] - {~'klass "URI" - ~'kwd "Keyword"})))))) - - -(deftest declare-pipeline-dereferencing-test-2 - (testing "declare-pipeline errors if vars don't contain a valid type (either class or keyword)" - (is (thrown? IllegalArgumentException - (eval `(declare-pipeline symbol-deref-test-pipeline - [not-a-class-or-a-keyword keyword-symbol ~'-> ~'(Seq Statement)] - {~'klass "A class" - ~'kwd "A keyword"} - :supported-operations #{:delete}))) - "Raises an exception because the string in not-a-class-or-a-keyword is not a valid parameter type"))) - -(deftest find-pipeline-test - (testing "find-pipeline" - (is (find-pipeline "grafter.pipeline-test/test-default-types")) - (is (find-pipeline 'grafter.pipeline-test/test-default-types)))) diff --git a/test/grafter/rdf/preview_test.clj b/test/grafter/rdf/preview_test.clj index 65268fc2..b3a3ee0d 100644 --- a/test/grafter/rdf/preview_test.clj +++ b/test/grafter/rdf/preview_test.clj @@ -1,95 +1,96 @@ (ns grafter.rdf.preview-test - (:require [grafter.rdf.preview :refer :all] + (:require #_[grafter.rdf.preview :refer :all] [clojure.test :refer :all] [grafter.rdf.templater :refer [graph]] - [grafter.tabular :refer [make-dataset graph-fn]] + #_[grafter.tabular :refer [make-dataset graph-fn]] [schema.core :as s] [grafter.rdf :as rdf])) -(def rdf:a "rdf:a") -(def foaf:name "foaf:name") -(def foaf:age "foaf:age") -(def foaf:Person "foaf:Person") -(def foaf:knows "foaf:knows") - -(def test-data (make-dataset [["http://graph/" "http://bob/" "Bob" 35 "http://alice/" "Alice" 30] - ["http://graph/" "http://wayne/" "Wayne" 24 "http://jane/" "Jane" 20]] - ["persons-graph-uri" "person-uri" "person-name" "person-age" "friend-uri" "friend-name" "friend-age"])) - -;; A standard graph-fn graph template -(def test-template (graph-fn [{:strs [persons-graph-uri person-uri person-name person-age friend-uri friend-name friend-age]}] - (graph persons-graph-uri - [person-uri - [rdf:a foaf:Person] - [foaf:name person-name] - [foaf:age person-age] - [foaf:knows friend-uri]] - [friend-uri - [rdf:a foaf:Person] - [foaf:name friend-name] - [foaf:age friend-age] - [foaf:knows person-uri]]))) - -(deftest preview-graph-test - (let [schema {:bindings {:strs [s/Symbol]} - :row s/Any - :template [s/Any]}] - - (testing "Substitutes data into template" - (let [preview (preview-graph test-data test-template 0)] - - (is (s/validate schema preview) - "Outer most object conforms to Schema") - - (let [template (first (:template preview))] - (is (= '(graph "http://graph/" - ["http://bob/" - [rdf:a foaf:Person] - [foaf:name "Bob"] - [foaf:age 35] - [foaf:knows "http://alice/"]] - ["http://alice/" - [rdf:a foaf:Person] - [foaf:name "Alice"] - [foaf:age 30] - [foaf:knows "http://bob/"]]) template) - - "Template includes substitutions from the source data")))) - - (testing "Substitutes renderable constants and data into template" - (let [preview (preview-graph test-data test-template 1 :render-constants)] - - (is (s/validate schema preview) - "Outer most object conforms to Schema") - - (let [template (first (:template preview))] - (is (= '(graph "http://graph/" - ["http://wayne/" - ["rdf:a" "foaf:Person"] - ["foaf:name" "Wayne"] - ["foaf:age" 24] - ["foaf:knows" "http://jane/"]] - ["http://jane/" - ["rdf:a" "foaf:Person"] - ["foaf:name" "Jane"] - ["foaf:age" 20] - ["foaf:knows" "http://wayne/"]]) template) - - "Template includes substitutions from the source data")))))) - -(def unprintable-template (graph-fn [{:strs [a b c g]}] - (graph g - [a [b c]]))) - -(deftest unprintable-previews-test - (let [obj (Object.) - ds (make-dataset [["a" "b" obj]]) - template (:template (preview-graph ds unprintable-template 0)) - unprintable-item (second (second (nth (first template) 2)))] - - (is (instance? grafter.rdf.preview.UnreadableForm unprintable-item) - "Unprintable item should be wrapped in a printable wrapper.") - - (is (= (str obj) (:form-string unprintable-item))) - (is (= (.getName (class obj)) - (:form-class unprintable-item))))) +(comment + (def rdf:a "rdf:a") + (def foaf:name "foaf:name") + (def foaf:age "foaf:age") + (def foaf:Person "foaf:Person") + (def foaf:knows "foaf:knows") + + (def test-data (make-dataset [["http://graph/" "http://bob/" "Bob" 35 "http://alice/" "Alice" 30] + ["http://graph/" "http://wayne/" "Wayne" 24 "http://jane/" "Jane" 20]] + ["persons-graph-uri" "person-uri" "person-name" "person-age" "friend-uri" "friend-name" "friend-age"])) + + ;; A standard graph-fn graph template + (def test-template (graph-fn [{:strs [persons-graph-uri person-uri person-name person-age friend-uri friend-name friend-age]}] + (graph persons-graph-uri + [person-uri + [rdf:a foaf:Person] + [foaf:name person-name] + [foaf:age person-age] + [foaf:knows friend-uri]] + [friend-uri + [rdf:a foaf:Person] + [foaf:name friend-name] + [foaf:age friend-age] + [foaf:knows person-uri]]))) + + (deftest preview-graph-test + (let [schema {:bindings {:strs [s/Symbol]} + :row s/Any + :template [s/Any]}] + + (testing "Substitutes data into template" + (let [preview (preview-graph test-data test-template 0)] + + (is (s/validate schema preview) + "Outer most object conforms to Schema") + + (let [template (first (:template preview))] + (is (= '(graph "http://graph/" + ["http://bob/" + [rdf:a foaf:Person] + [foaf:name "Bob"] + [foaf:age 35] + [foaf:knows "http://alice/"]] + ["http://alice/" + [rdf:a foaf:Person] + [foaf:name "Alice"] + [foaf:age 30] + [foaf:knows "http://bob/"]]) template) + + "Template includes substitutions from the source data")))) + + (testing "Substitutes renderable constants and data into template" + (let [preview (preview-graph test-data test-template 1 :render-constants)] + + (is (s/validate schema preview) + "Outer most object conforms to Schema") + + (let [template (first (:template preview))] + (is (= '(graph "http://graph/" + ["http://wayne/" + ["rdf:a" "foaf:Person"] + ["foaf:name" "Wayne"] + ["foaf:age" 24] + ["foaf:knows" "http://jane/"]] + ["http://jane/" + ["rdf:a" "foaf:Person"] + ["foaf:name" "Jane"] + ["foaf:age" 20] + ["foaf:knows" "http://wayne/"]]) template) + + "Template includes substitutions from the source data")))))) + + (def unprintable-template (graph-fn [{:strs [a b c g]}] + (graph g + [a [b c]]))) + + (deftest unprintable-previews-test + (let [obj (Object.) + ds (make-dataset [["a" "b" obj]]) + template (:template (preview-graph ds unprintable-template 0)) + unprintable-item (second (second (nth (first template) 2)))] + + (is (instance? grafter.rdf.preview.UnreadableForm unprintable-item) + "Unprintable item should be wrapped in a printable wrapper.") + + (is (= (str obj) (:form-string unprintable-item))) + (is (= (.getName (class obj)) + (:form-class unprintable-item)))))) diff --git a/test/grafter/rdf/templater_test.clj b/test/grafter/rdf/templater_test.clj index a6f7d159..23b427c2 100644 --- a/test/grafter/rdf/templater_test.clj +++ b/test/grafter/rdf/templater_test.clj @@ -2,7 +2,7 @@ (:require [clojure.test :refer :all] [grafter.rdf :refer :all] ;[grafter.rdf.protocols :refer [->Triple]] - [grafter.tabular :refer [make-dataset graph-fn]] + #_[grafter.tabular :refer [make-dataset graph-fn]] [grafter.rdf.templater :refer [graph triplify]])) (def first-turtle-template ["http://example.com/subjects/1" diff --git a/test/grafter/sequences_test.clj b/test/grafter/sequences_test.clj index c4c05e92..3621420f 100644 --- a/test/grafter/sequences_test.clj +++ b/test/grafter/sequences_test.clj @@ -1,22 +1,21 @@ (ns grafter.sequences-test - (:require [clojure.test :refer :all] + #_(:require [clojure.test :refer :all] [grafter.sequences :refer :all])) +(comment + (deftest column-names-seq-test + (testing "iterates alphabet repeatedly like it's numeric" + (is (= ["A" "B" "AA" "AB" "BA" "BB" "AAA" "AAB"] + (take 8 (column-names-seq "AB")))))) -(deftest column-names-seq-test - (testing "iterates alphabet repeatedly like it's numeric" - (is (= ["A" "B" "AA" "AB" "BA" "BB" "AAA" "AAB"] - (take 8 (column-names-seq "AB")))))) + (deftest fill-when-test + (testing "fills blank strings with previous non-blank value" + (is (= ["a" "a" "a" "b" "b"] + (fill-when '("a" "" nil "b" nil))))) -(deftest fill-when-test - (testing "fills blank strings with previous non-blank value" - (is (= ["a" "a" "a" "b" "b"] - (fill-when '("a" "" nil "b" nil))))) - - (testing "applies given predicate to input values" - (is (= [3 1 1 1 4] - (fill-when pos? [3 1 -1 0 4])))) - - (testing "fills initial nil values with given value" - (is (= [:a :a :b] - (fill-when (complement nil?) '(nil nil :b) :a))))) + (testing "applies given predicate to input values" + (is (= [3 1 1 1 4] + (fill-when pos? [3 1 -1 0 4])))) + (testing "fills initial nil values with given value" + (is (= [:a :a :b] + (fill-when (complement nil?) '(nil nil :b) :a)))))) diff --git a/test/grafter/tabular/melt_test.clj b/test/grafter/tabular/melt_test.clj deleted file mode 100644 index 7ae949a3..00000000 --- a/test/grafter/tabular/melt_test.clj +++ /dev/null @@ -1,59 +0,0 @@ -(ns grafter.tabular.melt-test - (:require [grafter.tabular :refer [column-names make-dataset]] - [grafter.tabular.melt :refer :all] - [clojure.test :refer :all])) - -(defn datasets-equal? - "Compares two datasets for equality ignoring the order of rows. Two datasets are considered equal if they have the same column - names in the same order, and have the same rows in any order." - [ds1 ds2] - (and (= (column-names ds1) (column-names ds2)) - (= (set (:rows ds1)) (set (:rows ds2))))) - -(deftest melt-test - (testing "Melts dataset" - (let [initial (make-dataset [[:sales 100 200 300] - [:costs 500 400 300]] ;; test we handle string keys too - [:measure "2012" :2013 :2014]) - - melted (melt initial [:measure]) - expected (make-dataset [[:sales "2012" 100] - [:sales :2013 200] - [:sales :2014 300] - [:costs "2012" 500] - [:costs :2013 400] - [:costs :2014 300]] - [:measure :variable :value])] - (is (datasets-equal? melted expected)))) - - (testing "Melts single row dataset" - (let [initial (make-dataset [[:sales 100 200 300]] - [:measure :2012 :2013 :2014]) - - melted (melt initial [:measure]) - expected (make-dataset [[:sales :2012 100] - [:sales :2013 200] - [:sales :2014 300]] - [:measure :variable :value])] - (is (datasets-equal? melted expected))))) - -(deftest melt-column-groups-test - (testing "Melt groups of columns" - (let [denormalised (make-dataset [[1 2 3 4 5 6 7 8] - [9 10 11 12 13 14 15 16]] - [:a :b, :c :d, :e :f, :g :h]) - - normalised (melt-column-groups denormalised [:a :b] [:x :y]) - expected (make-dataset [[1 2 3 4] - [9 10 11 12] - [1 2 5 6] - [9 10 13 14] - [1 2 7 8] - [9 10 15 16]] - [:a :b :x :y])] - (is (datasets-equal? normalised expected)))) - - (testing "Throws on invalid column group size" - (let [initial (make-dataset [[1 2 3 4]] - [:fixed :a :b :c])] - (is (thrown? IllegalArgumentException (melt-column-groups initial [:fixed] [:c1 :c2])))))) diff --git a/test/grafter/tabular_test.clj b/test/grafter/tabular_test.clj deleted file mode 100644 index fb8bc1f9..00000000 --- a/test/grafter/tabular_test.clj +++ /dev/null @@ -1,820 +0,0 @@ -(ns grafter.tabular-test - (:require [clojure.test :refer :all] - [grafter.sequences :as seqs] - [grafter.tabular.common :as tabc] - [grafter.tabular :refer :all] - [grafter.rdf.protocols :refer [->Quad]] - [grafter.rdf.templater :refer [graph triplify]] - [grafter.tabular.csv] - [grafter.tabular.excel] - [grafter.tabular.edn] - [incanter.core :as inc] - [clojure.java.io :as io]) - (:import [java.io File])) - -(deftest header-functions-tests - (let [raw-data [[:a :b :c] [1 2 3] [4 5 6]]] - (testing "move-first-row-to-header" - (let [retval (move-first-row-to-header raw-data)] - (testing "returns a pair" - (testing "where first item is the header" - (is (= [:a :b :c] (first retval)))) - (testing "and the second item is the source data without the first row" - (is (= (rest raw-data) (second retval))))))))) - -(deftest dataset-equality-tests - (is (= (make-dataset '({"a" 1, "b" 2} - {"a" 3, "b" 4, "c" 5}) - ["a" "b" "c"]) - (make-dataset '({"a" 1, "b" 2, "c" nil} - {"a" 3, "b" 4, "c" 5}) - ["a" "b" "c"]))) - - (is (= (make-dataset '({"a" 1, "b" 2, "c" 5} - {"a" 3, "b" 4}) - ["a" "b" "c"]) - (make-dataset '({"a" 1, "b" 2, "c" 5} - {"a" 3, "b" 4, "c" nil}) - ["a" "b" "c"])))) - -(deftest make-dataset-tests - (testing "make-dataset" - (let [raw-data [[1 2 3] [4 5 6]] - ds1 (make-dataset [[1 2][3 4]] ["a" "b"]) - ds2 (make-dataset [[1 2][3 4]] ["c" "d"])] - - (testing "converts a seq of seqs into a dataset" - (is (instance? incanter.core.Dataset - (make-dataset raw-data)))) - - (testing "assigns column names alphabetically by default" - (let [header (:column-names (make-dataset raw-data))] - (is (= ["a" "b" "c"] header)))) - - (testing "takes a function that extracts the column names (header row)" - (let [dataset (make-dataset raw-data move-first-row-to-header) - header (:column-names dataset)] - - (is (= [1 2 3] header)))) - - (testing "making a dataset from an existing dataset" - (is (= ds1 - (make-dataset ds1)) - "Preserves data and column-names") - (is (= ds2 - (make-dataset ds1 ["c" "d"])))) - - (testing "making a dataset with ragged rows" - (is (= (make-dataset '({"a" 1, "b" 2} - {"a" 3, "b" 4, "c" 5}) - ["a" "b" "c"]) - (make-dataset '({"a" 1, "b" 2, "c" nil} - {"a" 3, "b" 4, "c" 5}) - ["a" "b" "c"]))) - - (is (= (make-dataset '({"a" 1, "b" 2, "c" 5} - {"a" 3, "b" 4}) - ["a" "b" "c"]) - (make-dataset '({"a" 1, "b" 2, "c" 5} - {"a" 3, "b" 4, "c" nil}) - ["a" "b" "c"])))) - - (testing "making a dataset with empty rows" - (let [dataset (make-dataset '((1 2) () ()) ["a" "b"]) - expected (make-dataset '((1 2) (nil nil) (nil nil)) ["a" "b"])] - (is (= dataset expected)))) - - (testing "metadata" - (let [md {:foo :bar} - ds (with-meta (make-dataset [[1 2 3]]) md)] - - (is (= (meta (make-dataset ds)) - md) - "Copy metadata when making a new dataset")))))) - - - -;;; These two vars define what the content of the files -;;; test/grafter/test.csv and test/grafter/test.xlsx should look like -;;; when loaded. -;;; -;;; - CSV data is always cast as Strings -;;; - Excel data when loaded is cast to floats - -(def raw-csv-data [["one" "two" "three"] - ["1" "2" "3"] - ["4" "5" "6"]]) - -(def raw-txt-data [["a" "b" "c"] - ["1" "2" "3"] - ["4" "5" "6"]]) - -(def raw-excel-data [["one" "two" "three"] - [1 2 3]]) - -(def csv-sheet (make-dataset raw-csv-data move-first-row-to-header)) - -(def txt-sheet (make-dataset raw-txt-data move-first-row-to-header)) - -(def excel-sheet (make-dataset raw-excel-data move-first-row-to-header)) - -(defn is-a-dataset? [thing] - (is (instance? incanter.core.Dataset thing))) - -(defn is-first-sheet? [sheet] - (is (= (make-dataset raw-excel-data) sheet))) - -(defn ->file-url-string [file-path] - (str "file://" (.getCanonicalPath (io/file file-path)))) - -(defn has-metadata? [ds] - (let [md (meta ds)] - (is md "There is no metadata set") - (is (:grafter.tabular/data-source md) "There is no :data-source set."))) - -(defmethod tabc/read-dataset* ::test - [source opts] - (with-meta (make-dataset [[1 2 3]]) {:remember :me})) - -(deftest read-dataset-tests - (testing "Open an existing Dataset" - (let [dataset (read-dataset (with-meta (make-dataset raw-csv-data) {:other :metadata}))] - (testing "returns a dataset" - (is-a-dataset? dataset) - (is (= {:other :metadata} - (meta dataset)))))) - - (testing "Open CSV file" - (let [dataset (read-dataset (io/resource "grafter/tabular/test.csv"))] - (testing "returns a dataset" - (is-a-dataset? dataset) - (has-metadata? dataset)))) - - (testing "Open CSV file and strip Byte-Order-Mark" - (testing "Strips BOM from first column name" - (let [dataset (read-dataset (io/resource "grafter/tabular/test-bom.csv")) - col-names (-> dataset (make-dataset move-first-row-to-header) (column-names))] - (is (= "foo" (first col-names))))) - (testing "Doesnt affect csv files without BOM" - (let [dataset (read-dataset (io/resource "grafter/tabular/test-nobom.csv")) - col-names (-> dataset (make-dataset move-first-row-to-header) (column-names))] - (is (= "foo" (first col-names)))))) - - (testing "Open text file" - (let [dataset (read-dataset (io/resource "grafter/tabular/test.txt") :format :csv)] - (testing "returns a dataset" - (is-a-dataset? dataset) - (has-metadata? dataset)))) - - (testing "Open XLS file" - (let [dataset (read-dataset (io/resource "grafter/tabular/test.xls"))] - (testing "returns a dataset" - (is-a-dataset? dataset) - (is-first-sheet? dataset) - (has-metadata? dataset)))) - - (testing "Open XLSX file" - (let [dataset (read-dataset (io/resource "grafter/tabular/test.xlsx"))] - (testing "returns a dataset" - (is-a-dataset? dataset) - (is-first-sheet? dataset) - (has-metadata? dataset)))) - - (testing "Open the second sheet of an XLS file" - (let [dataset (read-dataset (io/resource "grafter/tabular/test.xls") :sheet "Sheet2")] - (testing "returns a dataset" - (is-a-dataset? dataset) - (has-metadata? dataset)))) - - (testing "Open the second sheet of an XLSX file" - (let [dataset (read-dataset (io/resource "grafter/tabular/test.xlsx") :sheet "Sheet2")] - (testing "returns a dataset" - (is-a-dataset? dataset) - (has-metadata? dataset)))) - - (testing "Open java.io.File" - (let [dataset (read-dataset (io/file "./dev/resources/grafter/tabular/test.xls"))] - (testing "returns a dataset" - (is-a-dataset? dataset) - (has-metadata? dataset)))) - - (testing "Open a CSV via a URL string" - (let [dataset (read-dataset (->file-url-string "./dev/resources/grafter/tabular/test.csv") :format :csv)] - (testing "returns a dataset" - (is-a-dataset? dataset) - (has-metadata? dataset)))) - - (testing "Open a CSV via an InputStream" - (with-open [in-str (io/input-stream (io/resource "grafter/tabular/test.csv"))] - (let [dataset (read-dataset in-str :format :csv)] - (is-a-dataset? dataset) - (has-metadata? dataset)))) - - (testing "Open an Excel file via a URL string" - (let [dataset (read-dataset (->file-url-string "./dev/resources/grafter/tabular/test.xls") :format :csv)] - (testing "returns a dataset" - (is-a-dataset? dataset) - (has-metadata? dataset)))) - - (testing "Remembers metadata returned by adapters" - (is (= :me (:remember (meta (read-dataset "./test/grafter/test.csv" :format ::test))))))) - -(deftest read-datasets-tests - (testing "Open XLS file" - (let [datasets (read-datasets (io/resource "grafter/tabular/test.xls"))] - (testing "returns a hashmap of sheet-names to datasets" - (is (every? is-a-dataset? (mapcat vals datasets))) - (is (every? has-metadata? (mapcat vals datasets))) - (is (= '("Sheet1" "Sheet2") (mapcat keys datasets)))))) - - (testing "Open CSV file" - (let [datasets (read-datasets (io/resource "grafter/tabular/test.csv"))] - (testing "returns a hashmap of sheet-names to datasets" - (is (every? is-a-dataset? (mapcat vals datasets))) - (is (every? has-metadata? (mapcat vals datasets))) - ;; CSV's only have one sheet so we set the sheet name to - ;; be the constant "csv". We can't really use the filename as - ;; in some contexts there isn't a file name - (is (= '("csv") (mapcat keys datasets)))))) - - (testing "Open XLSX file" - (let [datasets (read-datasets (io/resource "grafter/tabular/test.xlsx"))] - (testing "returns a hashmap of sheet-names to datasets" - (is (every? is-a-dataset? (mapcat vals datasets))) - (is (= '("Sheet1" "Sheet2") (mapcat keys datasets)))))) - - (testing "Open java.io.File" - (let [datasets (read-datasets (io/file "./dev/resources/grafter/tabular/test.xls"))] - (testing "returns a hashmap of sheet-names to datasets" - (is (every? is-a-dataset? (mapcat vals datasets))) - (is (= '("Sheet1" "Sheet2") (mapcat keys datasets)))))) - - (testing "Open InputStream" - (with-open [in-stream (io/input-stream (io/resource "grafter/tabular/test.xlsx"))] - (let [datasets (read-datasets in-stream :format :xlsx)] - (is (every? is-a-dataset? (mapcat vals datasets))) - (is (= '("Sheet1" "Sheet2") (mapcat keys datasets)))))) - - (testing "Opening a sequential thing" - (let [ds (grafter.tabular/make-dataset [[1 2 3]]) - datasets (read-datasets [{"foo" ds}])] - (is (every? is-a-dataset? (mapcat vals datasets))) - (is ((first datasets) "foo") ds))) - - (testing "raises Exception if called with :sheet option" - (is (thrown? IllegalArgumentException - (read-datasets (io/resource "grafter/tabular/test.xls") :sheet "Sheet1"))))) - -(deftest make-dataset-tests - (let [dataset (make-dataset csv-sheet)] - (testing "make-dataset" - (testing "makes incanter datasets." - (is (inc/dataset? dataset))) - - (testing "Automatically assigns column names alphabetically if none are given" - (let [columns (:column-names (make-dataset [(range 30)]))] - (is (= "aa" (nth columns 26))) - (is (= "ab" (nth columns 27))))) - - (testing "Returns self if data is already a dataset and no column names are specified" - (let [ds (test-dataset 5 5)] - (is (= ds (make-dataset ds)))))))) - -(deftest resolve-column-id-tests - (testing "resolve-column-id" - (let [dataset (test-dataset 5 5)] - (are [expected lookup] - (= expected (resolve-column-id dataset lookup :not-found)) - "a" "a" - "a" :a - "a" 0 - :not-found "z" - :not-found :z)))) - -(deftest columns-tests - (testing "columns" - (let [expected-dataset (test-dataset 5 2) - test-data (test-dataset 5 10)] - (testing "Narrows by string names" - (is (= expected-dataset - (columns test-data ["a" "b"])) "Should select just columns a and b")) - - (testing "Narrows by numeric ids" - (is (= expected-dataset - (columns test-data [0 1])) "Should select columns 0 and 1 (a and b)")) - - (testing "Narrows by keywords" - (is (= expected-dataset - (columns test-data [:a :b])) "should select columns 0 and 1 (a and b)")) - - (testing "works with infinite sequences" - (is (columns test-data (grafter.sequences/integers-from 5)) - "Takes as much as it can from the supplied sequence and returns those columns.") - - (is (thrown? IndexOutOfBoundsException - (columns test-data (range 10 100))) - "Raises an exception if columns when paired with data are not actually column headings.")) - - (testing "Returns all columns from unordered sequence" - (let [expected-dataset (assoc (test-dataset 5 4) - :column-names - ["a" "b" "d" "c"])] - (is (= expected-dataset - (columns test-data [:a :b :d :c])) - "should return dataset containing the cols :a :b :d :c"))) - - (testing "Missing keys" - (testing "when at least one key is found (even though rest of key range is missing)" - (is (columns test-data (range -3 2)) - expected-dataset)) - - (testing "when no keys are found" - (testing "with a range" - (is (thrown-with-msg? IndexOutOfBoundsException - #"The columns: 11, 12 are not currently defined" - (columns test-data (range 11 13))))) - - (testing "with an infinite range" - (is (thrown-with-msg? IndexOutOfBoundsException - #"The columns: 13, 14, 15 ... are not currently defined" - (columns test-data (iterate inc 13))))) - - (testing "with keywords" - (is (thrown-with-msg? IndexOutOfBoundsException - #"The columns: :k, :l are not currently defined" - (columns test-data [:k :l])))))))) - - (testing "preserves metadata" - (let [md {:foo :bar} - ds (with-meta (make-dataset [[1 2 3]]) md)] - (is (= md - (meta (columns ds [0])))))) - - (testing "still returns a dataset even with only one row" - (let [test-data (make-dataset [["Doc Brown" "Einstein"]] ["Owner" "Dog"]) - result (columns test-data ["Owner" "Dog"])] - (is (is-a-dataset? result)) - - (is (= test-data result)))) - - (testing "Duplicate columns in the selection leads to duplicated column-names" - ;; NOTE that these behaviour's aren't really desirable - but its - ;; hard to prevent without using only finite sequences for - ;; selection. - ;; - ;; These tests are primarily to document this behaviour - even - ;; though it can be undesirable. - (let [expected-dataset (make-dataset [[0 0] [1 1]] - ["a" "a"]) - test-dataset (test-dataset 2 2) - - result (columns test-dataset ["a" "a"])] - (is (= ["a" "a"] (column-names result))) - (is (= expected-dataset result)) - ;; Columns crops the supplied sequence to the data. - ;; This means duplicate columns may sneak in. - (is (= expected-dataset (columns test-dataset ["a" "a" "b"])))))) - -(deftest rows-tests - (let [test-data (test-dataset 10 2)] - (testing "rows" - (testing "works with infinite sequences" - (is (= test-data (rows (test-dataset 10 2) (seqs/integers-from 0))))) - - (testing "pairing [5 6 7 8 9] with row numbers [0 1 2 3 4 5 6 7 8 9] returns rows [5 6 7 8 9]" - (let [expected-dataset (make-dataset [[5 5] - [6 6] - [7 7] - [8 8] - [9 9]])] - (is (= expected-dataset (rows test-data - [5 6 7 8 9]))))) - - (testing "allows returning multiple copies of consecutive rows" - (let [expected-dataset (make-dataset [[2 2] - [2 2]])] - - (is (= expected-dataset (rows test-data [2 2])))) - - (let [expected-dataset (make-dataset [[0 0] - [1 1] - [2 2] - [2 2]])] - - (is (= expected-dataset (rows test-data [0 1 2 2]))))) - - (testing "preserves metadata" - (let [md {:foo :bar} - ds (with-meta (make-dataset [[1 2 3]]) md)] - (is (= md - (meta (rows ds [0]))))))))) - -(deftest drop-rows-test - (testing "drop-rows" - (let [dataset (test-dataset 3 1)] - (is (= (make-dataset [[1] [2]]) (drop-rows dataset 1))) - (is (= (make-dataset [[2]]) (drop-rows dataset 2))) - (is (= (make-dataset [] ["a"]) (drop-rows dataset 1000))))) - - (testing "preserves metadata" - (let [md {:foo :bar} - ds (with-meta (make-dataset [[1 2 3]]) md)] - (is (= md - (meta (drop-rows ds 1))))))) - -(deftest reorder-columns-test - (testing "reorder-columns" - (is (= (-> (reorder-columns (test-dataset 3 4) - ["a" "c" "b" "d"]) - :column-names) - ["a" "c" "b" "d"]))) - - (testing "preserves metadata" - (let [md {:foo :bar} - ds (with-meta (test-dataset 3 4) md)] - (is (= md - (meta (reorder-columns ds ["b" "c" "a" "d"]))))))) - -(deftest grep-test - (let [dataset (make-dataset [["one" "two" "bar"] - ["foo" "bar" "b2az"] - ["foo" "blee" "bl3ah"]]) - - expected-dataset (make-dataset [["foo" "bar" "b2az"] - ["foo" "blee" "bl3ah"]])] - - (testing "works with non string cell values" - (let [ds (make-dataset [["foo" 1] - ["foo" 2] - ["bar" 3]])] - (is (= (make-dataset [["foo" 1] ["foo" 2]]) - (grep ds "foo"))) - - (is (= (make-dataset [["foo" 1]]) - (grep ds 1))))) - - (testing "grep" - (testing "with a function" - (testing "receives a single cell as its argument" - (grep dataset (fn [cell] - (is (= String (class cell)))))) - - (is (= expected-dataset - (grep dataset (fn [cell] - (= cell "foo"))))) - - (is (= expected-dataset - (grep dataset (fn [cell] - (.startsWith cell "f"))))) - - - (let [expected (make-dataset [["one" "two" "bar"]])] - (is (= expected - (grep dataset (fn [cell] - (= cell "bar")) ["c"]))))) - (testing "with a string" - (is (= expected-dataset - (grep dataset "fo")))) - - (testing "with a regex" - (is (= expected-dataset - (grep dataset #"\d")))) - - (testing "with a sequence" - (is (= expected-dataset - (grep dataset [1 2])))) - - (testing "on an empty dataset" - (let [empty-ds (make-dataset)] - (is (= empty-ds - (grep empty-ds #"foo"))))) - - (testing "preserves metadata" - (let [md {:foo :bar} - ds (with-meta (make-dataset [[1 2 3]]) md)] - (is (= md - (meta (grep ds 1))))))))) - -(deftest mapc-test - (let [dataset (make-dataset [[1 2 "foo" 4] - [5 6 "bar" 8] - [9 10 "baz" 12]]) - - fs {"a" str, "b" inc, "c" identity, "d" inc} - fs-incomplete {"a" str, "b" inc, "d" inc} - fs-vec [str inc identity inc] - expected-dataset (make-dataset [["1" 3 "foo" 5] - ["5" 7 "bar" 9] - ["9" 11 "baz" 13]])] - (testing "mapc with a hashmap" - (testing "complete hashmap" - (is (= expected-dataset - (mapc dataset fs)))) - - (testing "incomplete hashmap implies mapping identity for unspecified columns" - (is (= expected-dataset - (mapc dataset fs-incomplete))) - (is (= dataset - (mapc dataset {})))) - - (testing "with new columns creates the column and passes nil to the function" - (let [dataset (make-dataset [[1 2 3] - [4 5 6]]) - expected-dataset (make-dataset [[1 2 3 nil] [4 5 6 nil]] ["a" "b" "c" "d"])] - - (is (= (mapc dataset {"d" identity}) expected-dataset))))) - - (testing "mapc with a vector of functions works positionally" - (is (= expected-dataset - (mapc dataset fs-vec)))) - - (testing "incomplete vector implies mapping identity over unspecified columns" - (let [dataset (make-dataset [[1 2 "foo" 4]]) - expected (make-dataset [["1" 2 "foo" 4]])] - (is (= expected - (mapc dataset [str]))))) - - (testing "preserves metadata" - (let [md {:foo :bar} - ds (with-meta (make-dataset [[1 2 3]]) md)] - (is (= md - (meta (mapc ds {"a" str})))))))) - - -(deftest swap-test - (let [ordered-ds (make-dataset - [["a" "b" "c" "d"] - ["a" "b" "c" "d"] - ["a" "b" "c" "d"]])] - (testing "swaping two columns" - (is (= (make-dataset - [["b" "a" "c" "d"] - ["b" "a" "c" "d"] - ["b" "a" "c" "d"]] - ["b" "a" "c" "d"]) - (swap ordered-ds "a" "b")))) - (testing "swaping two times two columns" - (is (= (make-dataset - [["b" "c" "a" "d"] - ["b" "c" "a" "d"] - ["b" "c" "a" "d"]] - ["b" "c" "a" "d"]) - (swap ordered-ds "a" "b" "a" "c")))) - (testing "swaping odd number of columns" - (is (thrown? java.lang.Exception - (swap ordered-ds "a" "b" "c")))) - - (testing "preserves metadata" - (let [md {:foo :bar} - ds (with-meta (make-dataset [[1 2 3]]) md)] - (is (= md - (meta (swap ds "a" "b")))))))) - -(deftest derive-column-test - (let [subject (make-dataset [[1 2] [3 4]]) - expected (make-dataset [[1 2 3] [3 4 7]])] - - (is (= (derive-column subject "c" ["a" "b"] +) - expected)) - - (is (= (derive-column subject "c" [:a :b] +) - expected)) - - (is (= (derive-column subject "c" :a str) - (make-dataset [[1 2 "1"] [3 4 "3"]]))) - - (testing "preserves metadata" - (let [md {:foo :bar} - ds (with-meta (make-dataset [[1 2 3]]) md)] - (is (= md - (meta (derive-column ds "c" ["a"] str)))))))) - -(deftest add-columns-test - (let [subject (make-dataset [[1 2 3] [4 5 6]])] - (testing "add-columns" - (testing "with hash-map" - (testing "fully populated" - (is (= (make-dataset [[1 2 3 "kitten" "trousers"] - [4 5 6 "kitten" "trousers"]] - ["a" "b" "c" "animal" "clothes"]) - - (add-columns subject {"animal" "kitten" "clothes" "trousers"})) - "adds cells to every row of the specified columns")) - - (testing "where the first row of data has no value in the lookup" - (is (= (make-dataset [[1 2 3 nil] - [4 5 6 "yes"]] - ["a" "b" "c" "above_4"]) - (add-columns subject ["above_4"] ["a"] {4 {"above_4" "yes"}})) - "adds columns to every row"))) - - (testing "with function" - (testing "with 1 argument" - - (let [foo-bar (fn [v] - {"foo" v "bar" v})] - (is (= (make-dataset [[1 2 3 1 1] - [4 5 6 4 4]] - ["a" "b" "c" "foo" "bar"]) - - (add-columns subject ["a"] foo-bar) - (add-columns subject "a" foo-bar)) - - "When given a function and a selection of column ids - it applies ƒ to the source cells, and merges the returned map into the cell"))) - - (testing "with multiple arguments" - (let [expected (make-dataset [[1 2 3 3 0] - [4 5 6 9 3]] - ["a" "b" "c" :d :e])] - (is (= expected (add-columns subject [:a "b"] - (fn [a b] {:d (+ a b) :e (dec a)})))))) - - (testing "preserves metadata" - (let [md {:foo :bar} - ds (with-meta (make-dataset [[1 2 3]]) md)] - (is (= md - (meta (add-columns ds {"foo" 1})))))))))) - -(deftest build-lookup-table-test - (let [debts (make-dataset [["rick" 25 33] - ["john" 9 12] - ["bob" 48 20] - ["kevin" 43 10]] - ["name" "age" "debt"])] - - (testing "with no specified return column return a map of all columns except the chosen key" - (is (= {"age" 25 "debt" 33} - ((build-lookup-table debts "name") "rick") - ((build-lookup-table debts ["name"]) "rick")))) - - (testing "with no specified return column and one row only" - (is (= {"age" 25 "debt" 33} - ((build-lookup-table (take-rows debts 1) "name") "rick")))) - - (testing "1 key column" - (is (= {"debt" 20} - ((build-lookup-table debts "name" ["debt"]) "bob") - ((build-lookup-table debts "name" "debt") "bob"))) - - (is (= {"age" 48} - ((build-lookup-table debts "name" "age") "bob"))) - - (is (= nil - ((build-lookup-table debts "name" "debt") "foo")))) - - (testing "many explicit return value columns" - (is (= {"debt" 20, "age" 48, "name" "bob"} - ((build-lookup-table debts ["name"] ["name" "age" :debt]) "bob")))) - - (testing "composite key columns" - (is (= {"debt" 33} - ((build-lookup-table debts ["name" "age"] "debt") ["rick" 25]))) - (is (= nil - ((build-lookup-table debts ["name" "age"] "debt") ["foo" 99])))) - - ;; TODO when we find a better error handling approach we should - ;; support it here too. - (testing "errors" - (testing "no key column" - (is (thrown? IndexOutOfBoundsException - ((build-lookup-table debts [] "debt") "bob")))) - (testing "key column not existing" - (is (thrown? IllegalArgumentException - ((build-lookup-table debts "foo" "debt") "bob"))))))) - -(deftest add-columns-with-lookup-table-test - (testing "add-columns and build-lookup-table compose nicely" - - (let [customer (make-dataset [[1 "bob" "hope"] [2 "john" "doe"] [3 "jane" "blogs"]] - ["id" "fname" "sname"]) - - accounts (make-dataset [[123 1 32] - [124 3 300] - [125 2 -500]] - ["account" "customer-id" "balance"])] - - (-> customer - (add-columns "id" (build-lookup-table accounts ["customer-id"])))))) - -(defmacro with-tempfile [file-var & forms] - `(let [~file-var (doto (File/createTempFile "unit-test-tempfile" "tmp") - (.deleteOnExit))] - (try - ~@forms - (finally - (.delete ~file-var))))) - -(deftest write-dataset-test - (testing "write-dataset" - - (are [format] - (let [sample-dataset (make-dataset [["1" "2" "3"] ["4" "5" "6"]])] - - (testing (str "with format" format) - (testing "with a file" - (with-tempfile a-file - (write-dataset a-file sample-dataset :format format) - (is (= sample-dataset (make-dataset (read-dataset a-file :format format) move-first-row-to-header))))) - - (testing "with a stream" - (with-tempfile a-file - (with-open [stream (clojure.java.io/output-stream a-file)] - (write-dataset stream sample-dataset :format format)) - - (is (= sample-dataset - (make-dataset (read-dataset a-file :format format) move-first-row-to-header))))))) - - :xls - "application/vnd.ms-excel" - :xlsx - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" - :csv - "text/csv") - - (testing "edn serialization" - (let [sample-dataset (make-dataset [["1" "2" "3"] ["4" "5" "6"]])] - (testing "with a file" - (with-tempfile a-file - (write-dataset a-file sample-dataset :format :edn) - (is (= sample-dataset (read-dataset a-file :format :edn))))) - - (testing "with a stream" - (with-tempfile a-file - (with-open [stream (clojure.java.io/output-stream a-file)] - (write-dataset stream sample-dataset :format :edn)) - - (is (= sample-dataset - (read-dataset a-file :format :edn))))))) - - (testing "in an unknown format" - (with-tempfile a-file - (is (thrown? IllegalArgumentException (write-dataset a-file :format :an-unknown-format))))))) - -(deftest graph-fn-test - (let [test-data [["http://a1" "http://b1" "http://c1" "http://graph1"] - ["http://a2" "http://b2" "http://c2" "http://graph2"]] - first-quad (->Quad "http://a1" "http://b1" "http://c1" "http://graph1") - second-quad (->Quad "http://a2" "http://b2" "http://c2" "http://graph2")] - - (testing "graph-fn" - (testing "destructuring" - (are [name column-names binding-form body] - (testing name - (let [ds (make-dataset test-data - column-names) - f (graph-fn [binding-form] - body)] - - (is (= first-quad - (first (f ds)))))) - - "by :keys" - [:a :b :c :d] {:keys [a b c d]} - (graph d - [a [b c]]) - - "by :strs" - ["a" "b" "c" "d"] {:strs [a b c d]} - (graph d - [a [b c]]) - - "by map" - ["a" :b "c" :d] {a "a" b :b c "c" graf :d} - (graph graf - [a [b c]]) - - "by position (vector)" - [:a :b :c :d] [one two three graf] - (graph graf - [one [two three]]))) - - (testing "concatenates sequences returned by each form in the body" - (let [ds (make-dataset test-data) - f (graph-fn [[one two three graf]] - (graph graf - [one [two three]]))] - - (is (= [first-quad second-quad] - (f ds))))) - - (testing "metadata" - (let [my-graphfn (graph-fn [[a b c]] - (graph "http://test.com/" - [a - [b c]])) - ds (make-dataset [["http://one/" "http://two/" "http://three/"]]) - quad-meta (meta (first (my-graphfn ds)))] (is (= {:grafter.tabular/row {"a" "http://one/" "b" "http://two/" "c" "http://three/"}} - quad-meta) - "Adds the row that yielded each Quad as metadata") - - (is (= {"a" "http://one/" "b" "http://two/" "c" "http://three/"} - (:grafter.tabular/row quad-meta)) - "Adds the row that yielded each Quad as metadata")))))) - -(deftest rename-test - (let [ds (test-dataset 1 2)] - (is (= (make-dataset [[0 0]] [:a :b]) - (rename-columns ds keyword))) - - (is (= (make-dataset [[0 0]] ["foo" "b"]) - (rename-columns ds {"a" "foo"}))))) From 9d6196d1fbfc02b5dbce59527946d7a565b5e6b2 Mon Sep 17 00:00:00 2001 From: Rick Moynihan Date: Wed, 12 Jul 2017 15:19:44 +0100 Subject: [PATCH 05/10] Remove non rdf deps --- test/grafter/rdf/preview_test.clj | 96 ----------------------------- test/grafter/rdf/templater_test.clj | 2 - test/grafter/sequences_test.clj | 21 ------- 3 files changed, 119 deletions(-) delete mode 100644 test/grafter/rdf/preview_test.clj delete mode 100644 test/grafter/sequences_test.clj diff --git a/test/grafter/rdf/preview_test.clj b/test/grafter/rdf/preview_test.clj deleted file mode 100644 index b3a3ee0d..00000000 --- a/test/grafter/rdf/preview_test.clj +++ /dev/null @@ -1,96 +0,0 @@ -(ns grafter.rdf.preview-test - (:require #_[grafter.rdf.preview :refer :all] - [clojure.test :refer :all] - [grafter.rdf.templater :refer [graph]] - #_[grafter.tabular :refer [make-dataset graph-fn]] - [schema.core :as s] - [grafter.rdf :as rdf])) - -(comment - (def rdf:a "rdf:a") - (def foaf:name "foaf:name") - (def foaf:age "foaf:age") - (def foaf:Person "foaf:Person") - (def foaf:knows "foaf:knows") - - (def test-data (make-dataset [["http://graph/" "http://bob/" "Bob" 35 "http://alice/" "Alice" 30] - ["http://graph/" "http://wayne/" "Wayne" 24 "http://jane/" "Jane" 20]] - ["persons-graph-uri" "person-uri" "person-name" "person-age" "friend-uri" "friend-name" "friend-age"])) - - ;; A standard graph-fn graph template - (def test-template (graph-fn [{:strs [persons-graph-uri person-uri person-name person-age friend-uri friend-name friend-age]}] - (graph persons-graph-uri - [person-uri - [rdf:a foaf:Person] - [foaf:name person-name] - [foaf:age person-age] - [foaf:knows friend-uri]] - [friend-uri - [rdf:a foaf:Person] - [foaf:name friend-name] - [foaf:age friend-age] - [foaf:knows person-uri]]))) - - (deftest preview-graph-test - (let [schema {:bindings {:strs [s/Symbol]} - :row s/Any - :template [s/Any]}] - - (testing "Substitutes data into template" - (let [preview (preview-graph test-data test-template 0)] - - (is (s/validate schema preview) - "Outer most object conforms to Schema") - - (let [template (first (:template preview))] - (is (= '(graph "http://graph/" - ["http://bob/" - [rdf:a foaf:Person] - [foaf:name "Bob"] - [foaf:age 35] - [foaf:knows "http://alice/"]] - ["http://alice/" - [rdf:a foaf:Person] - [foaf:name "Alice"] - [foaf:age 30] - [foaf:knows "http://bob/"]]) template) - - "Template includes substitutions from the source data")))) - - (testing "Substitutes renderable constants and data into template" - (let [preview (preview-graph test-data test-template 1 :render-constants)] - - (is (s/validate schema preview) - "Outer most object conforms to Schema") - - (let [template (first (:template preview))] - (is (= '(graph "http://graph/" - ["http://wayne/" - ["rdf:a" "foaf:Person"] - ["foaf:name" "Wayne"] - ["foaf:age" 24] - ["foaf:knows" "http://jane/"]] - ["http://jane/" - ["rdf:a" "foaf:Person"] - ["foaf:name" "Jane"] - ["foaf:age" 20] - ["foaf:knows" "http://wayne/"]]) template) - - "Template includes substitutions from the source data")))))) - - (def unprintable-template (graph-fn [{:strs [a b c g]}] - (graph g - [a [b c]]))) - - (deftest unprintable-previews-test - (let [obj (Object.) - ds (make-dataset [["a" "b" obj]]) - template (:template (preview-graph ds unprintable-template 0)) - unprintable-item (second (second (nth (first template) 2)))] - - (is (instance? grafter.rdf.preview.UnreadableForm unprintable-item) - "Unprintable item should be wrapped in a printable wrapper.") - - (is (= (str obj) (:form-string unprintable-item))) - (is (= (.getName (class obj)) - (:form-class unprintable-item)))))) diff --git a/test/grafter/rdf/templater_test.clj b/test/grafter/rdf/templater_test.clj index 23b427c2..5abe5f0d 100644 --- a/test/grafter/rdf/templater_test.clj +++ b/test/grafter/rdf/templater_test.clj @@ -1,8 +1,6 @@ (ns grafter.rdf.templater-test (:require [clojure.test :refer :all] [grafter.rdf :refer :all] - ;[grafter.rdf.protocols :refer [->Triple]] - #_[grafter.tabular :refer [make-dataset graph-fn]] [grafter.rdf.templater :refer [graph triplify]])) (def first-turtle-template ["http://example.com/subjects/1" diff --git a/test/grafter/sequences_test.clj b/test/grafter/sequences_test.clj deleted file mode 100644 index 3621420f..00000000 --- a/test/grafter/sequences_test.clj +++ /dev/null @@ -1,21 +0,0 @@ -(ns grafter.sequences-test - #_(:require [clojure.test :refer :all] - [grafter.sequences :refer :all])) -(comment - (deftest column-names-seq-test - (testing "iterates alphabet repeatedly like it's numeric" - (is (= ["A" "B" "AA" "AB" "BA" "BB" "AAA" "AAB"] - (take 8 (column-names-seq "AB")))))) - - (deftest fill-when-test - (testing "fills blank strings with previous non-blank value" - (is (= ["a" "a" "a" "b" "b"] - (fill-when '("a" "" nil "b" nil))))) - - (testing "applies given predicate to input values" - (is (= [3 1 1 1 4] - (fill-when pos? [3 1 -1 0 4])))) - - (testing "fills initial nil values with given value" - (is (= [:a :a :b] - (fill-when (complement nil?) '(nil nil :b) :a)))))) From 628debc9283e97cc86c44b006203c416770a539a Mon Sep 17 00:00:00 2001 From: Rick Moynihan Date: Wed, 12 Jul 2017 14:59:00 +0100 Subject: [PATCH 06/10] Simplify source layout now we no longer use lein-repack and have split grafter.tabular into its own repo --- project.clj | 2 -- src/{rdf-common => }/grafter/rdf.clj | 0 src/{rdf-common => }/grafter/rdf/formats.clj | 0 src/{rdf-common => }/grafter/rdf/io.clj | 0 src/{rdf-common => }/grafter/rdf/protocols.clj | 0 src/{rdf-common => }/grafter/rdf/repository.clj | 0 src/{rdf-common => }/grafter/rdf/repository/registry.clj | 0 src/{rdf-common => }/grafter/rdf/sparql.clj | 0 src/{rdf-common => }/grafter/rdf/templater.clj | 0 9 files changed, 2 deletions(-) rename src/{rdf-common => }/grafter/rdf.clj (100%) rename src/{rdf-common => }/grafter/rdf/formats.clj (100%) rename src/{rdf-common => }/grafter/rdf/io.clj (100%) rename src/{rdf-common => }/grafter/rdf/protocols.clj (100%) rename src/{rdf-common => }/grafter/rdf/repository.clj (100%) rename src/{rdf-common => }/grafter/rdf/repository/registry.clj (100%) rename src/{rdf-common => }/grafter/rdf/sparql.clj (100%) rename src/{rdf-common => }/grafter/rdf/templater.clj (100%) diff --git a/project.clj b/project.clj index d6b2c968..b0026382 100644 --- a/project.clj +++ b/project.clj @@ -29,8 +29,6 @@ :src-dir-uri "http://github.com/Swirrl/grafter/blob/0.8.x-SNAPSHOT/" :src-linenum-anchor-prefix "L"} - :source-paths ["src/common" "src/rdf-repository" "src/tabular" - "src/templater" "src/rdf-common" "src/pipeline"] ;; Prevent Java process from appearing as a GUI app in OSX when ;; Swing classes are loaded. diff --git a/src/rdf-common/grafter/rdf.clj b/src/grafter/rdf.clj similarity index 100% rename from src/rdf-common/grafter/rdf.clj rename to src/grafter/rdf.clj diff --git a/src/rdf-common/grafter/rdf/formats.clj b/src/grafter/rdf/formats.clj similarity index 100% rename from src/rdf-common/grafter/rdf/formats.clj rename to src/grafter/rdf/formats.clj diff --git a/src/rdf-common/grafter/rdf/io.clj b/src/grafter/rdf/io.clj similarity index 100% rename from src/rdf-common/grafter/rdf/io.clj rename to src/grafter/rdf/io.clj diff --git a/src/rdf-common/grafter/rdf/protocols.clj b/src/grafter/rdf/protocols.clj similarity index 100% rename from src/rdf-common/grafter/rdf/protocols.clj rename to src/grafter/rdf/protocols.clj diff --git a/src/rdf-common/grafter/rdf/repository.clj b/src/grafter/rdf/repository.clj similarity index 100% rename from src/rdf-common/grafter/rdf/repository.clj rename to src/grafter/rdf/repository.clj diff --git a/src/rdf-common/grafter/rdf/repository/registry.clj b/src/grafter/rdf/repository/registry.clj similarity index 100% rename from src/rdf-common/grafter/rdf/repository/registry.clj rename to src/grafter/rdf/repository/registry.clj diff --git a/src/rdf-common/grafter/rdf/sparql.clj b/src/grafter/rdf/sparql.clj similarity index 100% rename from src/rdf-common/grafter/rdf/sparql.clj rename to src/grafter/rdf/sparql.clj diff --git a/src/rdf-common/grafter/rdf/templater.clj b/src/grafter/rdf/templater.clj similarity index 100% rename from src/rdf-common/grafter/rdf/templater.clj rename to src/grafter/rdf/templater.clj From 79900d0e59c40655f8f59c342a2d768fa4716399 Mon Sep 17 00:00:00 2001 From: Rick Moynihan Date: Wed, 12 Jul 2017 15:12:36 +0100 Subject: [PATCH 07/10] New README.md describing deprecation of grafter.tabular. Amend release notes --- README.md | 80 +++++++++++-------------------------------------- RELEASENOTES.md | 8 +++++ 2 files changed, 26 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 8fef426f..9cc89759 100644 --- a/README.md +++ b/README.md @@ -2,76 +2,32 @@ "For the hard graft of linked data processing." -Grafter is a library, DSL and suite of tools for flexible, efficient, ETL, data -transformation and processing. Its primary use is for handling Linked Data -conversions from tabular data formats into RDF's graph data format, but it is -equally adept at handling tabular data conversions. +Grafter is a [Clojure](http://clojure.org/) library for linked data +processing. It is mature and under active development. -See the official grafter website at [grafter.org](http://grafter.org/) -for more details. +It provides support for all common RDF serialisations and +includes a library of functions for querying and writing to SPARQL +repositories. -For the Grafter rationale see our blog post: -[The hard graft of Linked Data ETL](http://blog.swirrl.com/articles/linked-data-etl/). +## FAQ -## What plans are there for Grafter? +*Where can I find the api-docs?* -Grafter is currently in the early stages of development, however -[Swirrl](http://swirrl.com/) has been using it to transform -significant amounts of data for our clients within the government. +[api.grafter.org](http://api.grafter.org/) -Grafter is currently an API and a small DSL for converting tabular -data into Linked Data. However we have ambitious plans to develop a -suite of tools on top of it. These tools are planned to include: +*Didn't grafter also contain tools for tabular processing?* -1. Command line tools for data processing. -1. Import services to load pipelines and execute predefined data - transformations. -1. A Graphical ETL Tool to assist non-programmers in creating data - transformation pipelines. +As of 0.9.0 the `grafter.tabular` library has been moved into a +[separate repository](https://github.com/Swirrl/grafter.tabular) so +the core grafter library can focus on processing linked data. -## Development +This part of the library is now considered deprecated. If you depend +on it you can still use it, and it may receive occaisional +maintainance updates. -Grafter is deployed on the standard Clojure build repository -[Clojars](http://clojars.org/). - -To use the Grafter API please add the following dependency to your Clojure -projects `project.clj` file. For more details on how to do this see -the [leiningen](http://leiningen.org/) build tool: - -[![Clojars Project](https://img.shields.io/clojars/v/grafter.svg)](https://clojars.org/grafter) - -**NOTE:** The public documentation, template projects and leiningen -plugin have not yet been updated to conform to the latest 0.8.x -versions of grafter. If you wish to use these please use the `0.7.6` -release. - -## Versioning - -We are currently following a `MAJOR.MINOR.PATCH` versioning scheme, -but are anticipating significant breaking API changes between minor -versions at least until we reach `1.0.0`. - -`PATCH` versions should be backwardly compatible with previous `MINOR` -versions. - -Releases will be tagged with an appropriate tag indicating their -`MAJOR.MINOR.PATCH` version. - -We are currently producing API docs for the master branch and all -tagged releases. - -- [API docs (master branch)](http://api.grafter.org/docs/master) -- [API docs (all releases)](http://api.grafter.org/) - -Additionally [grafter.org](http://grafter.org/) contains a -[quick start guide](http://grafter.org/getting-started/index.html) and -supplementary documentation. - -## Getting Started - -There is a comprehensive -[getting started guide](http://grafter.org/getting-started/index.html) on the -project website. +If you're looking to start a greenfield project then you can easily +wire up any capable CSV/excel parser to the RDF processing side of +grafter. ## License diff --git a/RELEASENOTES.md b/RELEASENOTES.md index 8e97ca41..a1efad6d 100644 --- a/RELEASENOTES.md +++ b/RELEASENOTES.md @@ -2,6 +2,14 @@ Copyright © 2014 Swirrl IT Ltd. +## Latest Releases + +Release notes are no longer updated in this file, but are managed via +github. For information on the latest releases since 0.7.6 please +see: + +https://github.com/Swirrl/grafter/releases + ## 2016-08-31 `VERSION 0.7.6` From 49cc4482d9ba51ee6150ec5bbc9aac10dc83be6f Mon Sep 17 00:00:00 2001 From: Rick Moynihan Date: Wed, 12 Jul 2017 17:46:03 +0100 Subject: [PATCH 08/10] fix codox config --- project.clj | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/project.clj b/project.clj index b0026382..55e83a8a 100644 --- a/project.clj +++ b/project.clj @@ -21,8 +21,7 @@ :codox {:defaults {:doc "FIXME: write docs" :doc/format :markdown} :output-dir "api-docs" - :sources ["src/tabular" "src/rdf-common" "src/pipeline" - ;; Include docs from grafter-url project too + :sources ["src" ;; Include docs from grafter-url project too "../grafter-url/src"] ;; TODO change this when we merge back to master From 2a62b27514a20f08eefd4aea638537062ba03d68 Mon Sep 17 00:00:00 2001 From: Rick Moynihan Date: Thu, 20 Jul 2017 13:43:50 +0100 Subject: [PATCH 09/10] Fix #109 to help prevent downstream clients leaking resources. Clients who receive this exception should adopt a pattern migrating code from this: (let [repo (repo)] (query repo "select * where { ?s ?p ?o .} limit 10") ) To something more like this: (let [repo (repo)] (with-open [conn (->connection repo)] (doall (query conn "select * where { ?s ?p ?o . } limit 10")))) We plan to add more eager APIs to assist with this in the future. --- src/grafter/rdf/repository.clj | 120 +++++++++++++-------------- test/grafter/rdf/repository_test.clj | 89 +++++++++++--------- test/grafter/rdf_test.clj | 68 +++++++-------- 3 files changed, 143 insertions(+), 134 deletions(-) diff --git a/src/grafter/rdf/repository.clj b/src/grafter/rdf/repository.clj index a1853470..e26565a9 100644 --- a/src/grafter/rdf/repository.clj +++ b/src/grafter/rdf/repository.clj @@ -92,57 +92,47 @@ (doto this (.add triple-stream base-uri format (resource-array (->sesame-uri graph))))))) +(definline throw-deprecated-exception! + "Throw a more helpful error message alerting people to the need to + change code. + + This is technically a breaking change, but it should indicate sites + which have a bug in them anyway." + [] + ;; Use a definline to remove extra stack frame from output so + ;; exception is closer to call site. + `(throw (ex-info "This function is no longer extended to Repository. You will need to update your code to call it on a repository connection instead." + {:error :deprecated-function}))) + (extend-type Repository pr/ITripleWriteable (pr/add-statement ([this statement] - (with-open [connection (.getConnection this)] - (log/debug "Opening connection" connection "on repo" this) - (pr/add-statement connection statement) - (log/debug "Closing connection" connection "on repo" this) - this)) + (throw-deprecated-exception!)) ([this graph statement] - (with-open [connection (.getConnection this)] - (log/debug "Opening connection" connection "on repo" this) - (pr/add-statement (.getConnection this) graph statement) - (log/debug "Closing connection" connection "on repo" this) - this))) + (throw-deprecated-exception!))) (pr/add ([this triples] - (with-open [connection (.getConnection this)] - (log/debug "Opening connection" connection "on repo" this) - (pr/add connection triples) - (log/debug "Closing connection" connection "on repo" this)) - this) + (throw-deprecated-exception!)) ([this graph triples] - (with-open [connection (.getConnection this)] - (log/debug "Opening connection" connection "on repo" this) - (pr/add connection graph triples) - (log/debug "Closing connection" connection "on repo" this) - this)) + (throw-deprecated-exception!)) ([this graph format triple-stream] - (with-open [^RepositoryConnection connection (.getConnection this)] - (pr/add connection graph format triple-stream)) - this) + (throw-deprecated-exception!)) ([this graph base-uri format triple-stream] - (with-open [^RepositoryConnection connection (.getConnection this)] - (pr/add connection graph base-uri format triple-stream)) - this)) + (throw-deprecated-exception!))) pr/ITripleDeleteable (pr/delete ([this quads] - (with-open [^RepositoryConnection connection (.getConnection this)] - (pr/delete connection quads))) + (throw-deprecated-exception!)) ([this graph quads] - (with-open [^RepositoryConnection connection (.getConnection this)] - (pr/delete connection graph quads))))) + (throw-deprecated-exception!)))) (defn memory-store @@ -224,13 +214,17 @@ (doto (SailRepository. sail) (.initialize)))) -(defn add->repo - ([] (sail-repo)) - ([acc] acc) - ([acc v] - (if (reduced? acc) - acc - (rdf/add acc v)))) +(defn add->repo [repo] + (fn + ([] (->connection repo)) + ([acc] (.close acc) repo) + ([acc v] + (try (if (reduced? acc) + acc + (rdf/add acc v)) + (catch Throwable ex + (.close acc) + (throw (ex-info "Exception when adding to repository" {} acc))))))) (defn- statements-with-inferred-format [res] (if (seq? res) @@ -250,13 +244,16 @@ ([] (sail-repo)) ([repo-or-data & data] (let [repo (if (instance? Repository repo-or-data) - repo-or-data - (rdf/add (sail-repo) (statements-with-inferred-format repo-or-data)))] + repo-or-data + (let [repo (sail-repo)] + (with-open [conn (->connection repo)] + (rdf/add conn (statements-with-inferred-format repo-or-data)) + repo)))] (let [xf (mapcat (fn [d] (cond (satisfies? pr/ITripleReadable d) (statements-with-inferred-format d) (seq d) d)))] - (transduce xf add->repo repo data))))) + (transduce xf (add->repo repo) data))))) (defn resource-repo "Like fixture repo but assumes all supplied data is on the java @@ -272,9 +269,13 @@ repository." ([] (sail-repo)) ([repo-or-data & data] - (let [repo (if (instance? Repository repo-or-data) - repo-or-data - (rdf/add (sail-repo) (statements-with-inferred-format (io/resource repo-or-data))))] + (let [repo (let [repo (sail-repo)] + (if (instance? Repository repo-or-data) + repo-or-data + (with-open [conn (->connection repo)] + (rdf/add conn (statements-with-inferred-format (io/resource repo-or-data))) + + repo)))] (apply fixture-repo repo (map io/resource data))))) (defn repo @@ -302,9 +303,10 @@ ([rdf-data sail] (let [r (doto (SailRepository. sail) (.initialize))] - (pr/add r (cond - (and rdf-data (satisfies? io/Coercions rdf-data)) (pr/to-statements rdf-data {}) - (or (seq rdf-data) (nil? rdf-data)) rdf-data)) + (with-open [conn (->connection r)] + (pr/add conn (cond + (and rdf-data (satisfies? io/Coercions rdf-data)) (pr/to-statements rdf-data {}) + (or (seq rdf-data) (nil? rdf-data)) rdf-data))) r))) (defn- query-bindings->map [^BindingSet qbs] @@ -317,13 +319,13 @@ (extend-protocol pr/ITransactable Repository (begin [repo] - (-> repo .getConnection (.setAutoCommit false))) + (throw-deprecated-exception!)) (commit [repo] - (-> repo .getConnection .commit)) + (throw-deprecated-exception!)) (rollback [repo] - (-> repo .getConnection .rollback)) + (throw-deprecated-exception!)) RepositoryConnection (begin [repo] @@ -352,23 +354,22 @@ clojure.core.protocols/CollReduce (coll-reduce ([this f] - (reduce f (f) this)) + (clojure.core.protocols/coll-reduce this f (f))) ([this f val] (with-open [c (.getConnection this)] (reduce f val c)))) pr/ISPARQLable (pr/query-dataset [this query-str model] - (pr/query-dataset (.getConnection this) query-str model)) + (throw-deprecated-exception!)) pr/ISPARQLUpdateable (pr/update! [this query-str] - (with-open [connection (.getConnection this)] - (pr/update! connection query-str))) + (throw-deprecated-exception!)) pr/ITripleReadable (pr/to-statements [this options] - (pr/to-statements (.getConnection this) options))) + (throw-deprecated-exception!))) (extend-type Graph pr/ITripleReadable @@ -533,16 +534,11 @@ (extend-protocol ToConnection RepositoryConnection - (->connection - [^Repository repo] - (if (instance? RepositoryConnection repo) - repo - (let [c (.getConnection repo)] - c))) + (->connection [conn] + conn) Repository - (->connection - [^Repository repo] + (->connection [^Repository repo] (.getConnection repo))) (defn make-restricted-dataset diff --git a/test/grafter/rdf/repository_test.clj b/test/grafter/rdf/repository_test.clj index 67703c3a..2c99cefc 100644 --- a/test/grafter/rdf/repository_test.clj +++ b/test/grafter/rdf/repository_test.clj @@ -6,7 +6,8 @@ [grafter.rdf :refer [statements]] [grafter.url :refer [->GrafterURL]] [grafter.rdf.formats :refer :all] - [clojure.test :refer :all]) + [clojure.test :refer :all] + [grafter.rdf.repository :as repo]) (:import org.openrdf.model.impl.GraphImpl org.openrdf.sail.memory.MemoryStore org.openrdf.repository.sparql.SPARQLRepository @@ -29,7 +30,7 @@ (first (grafter.rdf/statements graph)))))) (deftest with-transaction-test - (let [test-db (repo)] + (with-open [test-db (repo/->connection (repo))] (testing "Transactions return last result of form if there's no error." (is (= :return-value (with-transaction test-db :return-value)))) @@ -53,11 +54,13 @@ (defn load-rdf-types-data ([file] (let [db (repo)] - (pr/add db (statements file)) + (with-open [conn (->connection db)] + (pr/add conn (statements file))) + db))) (deftest query-test - (let [test-db (load-rdf-types-data triple-fixture-file-path)] + (with-open [test-db (->connection (load-rdf-types-data triple-fixture-file-path))] (are [type f?] (is (f? (let [o (-> (query test-db (str "PREFIX : SELECT ?o WHERE {" type " :hasValue ?o . }")) first @@ -78,42 +81,49 @@ (testing "roundtripping ttl file" (let [file triple-fixture-file-path] - (is (= (set (statements (load-rdf-types-data file))) - (set (statements file)))))) + (with-open [conn (->connection (load-rdf-types-data file))] + (is (= (set (statements conn)) + (set (statements file))))))) (testing "roundtripping trig file" (let [file quad-fixture-file-path] - (is (= (set (statements (load-rdf-types-data file))) - (set (statements file)))))))) + (with-open [conn (->connection (load-rdf-types-data file))] + (is (= (set (statements conn)) + (set (statements file))))))))) (deftest delete-statement-test (testing "arity 2 delete" (are [initial-data delete-form] - (let [test-db (load-rdf-types-data initial-data) - quads-to-delete (statements test-db)] - delete-form - (is (not (query test-db "ASK { ?s ?p ?o } LIMIT 1")) - "Should be deleted")) + (with-open [test-db (->connection (load-rdf-types-data initial-data))] + (let [quads-to-delete (statements test-db)] + delete-form + (is (not (query test-db "ASK { ?s ?p ?o } LIMIT 1")) + "Should be deleted"))) - (load-rdf-types-data triple-fixture-file-path) (pr/delete test-db quads-to-delete) - (load-rdf-types-data quad-fixture-file-path) (pr/delete test-db quads-to-delete))) + triple-fixture-file-path (pr/delete test-db quads-to-delete) + quad-fixture-file-path (pr/delete test-db quads-to-delete))) (testing "arity 3 delete" - (let [test-db (-> (repo) - (pr/add - (URL. "http://a") - (statements triple-fixture-file-path)) - (pr/add - (URL. "http://b") - (statements triple-fixture-file-path)))] - (pr/delete test-db - (URL. "http://a") - (statements triple-fixture-file-path)) - (is (not (query test-db "ASK { GRAPH { ?s ?p ?o } } LIMIT 1")) - "Should be deleted") - - (is (query test-db "ASK { GRAPH { ?s ?p ?o } } LIMIT 1") - "Should not be deleted")))) + (let [repo (repo)] + + (with-open [conn (->connection repo)] + (-> conn + (pr/add + (URL. "http://a") + (statements triple-fixture-file-path)) + (pr/add + (URL. "http://b") + (statements triple-fixture-file-path)))) + + (with-open [test-db (->connection repo)] + (pr/delete test-db + (URL. "http://a") + (statements triple-fixture-file-path)) + (is (not (query test-db "ASK { GRAPH { ?s ?p ?o } } LIMIT 1")) + "Should be deleted") + + (is (query test-db "ASK { GRAPH { ?s ?p ?o } } LIMIT 1") + "Should not be deleted"))))) (deftest col-reduce-repo-test (is (= (into #{} (repo)) @@ -134,13 +144,15 @@ (URI. "http://triple"))})) (testing "Calling with multiple sets of quads appends them all into the repo" - (is (= 2 (count (grafter.rdf/statements (fixture-repo (io/resource "quads.nq") - (io/resource "quads.trig")))))))) + (with-open [conn (->connection (fixture-repo (io/resource "quads.nq") + (io/resource "quads.trig")))] + (is (= 2 (count (grafter.rdf/statements conn))))))) (deftest resource-repo-test (testing "Calling with multiple sets of quads appends them all into the repo" - (is (= 2 (count (grafter.rdf/statements (resource-repo "quads.nq" - "quads.trig"))))))) + (with-open [conn (->connection (resource-repo "quads.nq" + "quads.trig"))] + (is (= 2 (count (grafter.rdf/statements conn))))))) (deftest sail-repo-test (is (instance? org.openrdf.repository.Repository (sail-repo))) @@ -149,10 +161,11 @@ (deftest batched-query-test - (let [repo (let [r (repo)] - (grafter.rdf/add r - (grafter.rdf/statements (io/resource "grafter/rdf/triples.nt"))) - r)] + (let [repo (repo)] + (with-open [conn (->connection repo)] + (grafter.rdf/add conn + (grafter.rdf/statements (io/resource "grafter/rdf/triples.nt")))) + (with-open [c (->connection repo)] (is (= 3 (count (batched-query "SELECT * WHERE { ?s ?p ?o .}" c diff --git a/test/grafter/rdf_test.clj b/test/grafter/rdf_test.clj index bd32c9cd..17830fcd 100644 --- a/test/grafter/rdf_test.clj +++ b/test/grafter/rdf_test.clj @@ -53,32 +53,32 @@ (let [triples (map (fn [i] (->Triple (URI. (str "http://subject" i)) (URI. (str "http://predicate" i)) (URI. (str "http://object" i)))) (range 1 11)) graph (URI. "http://test-graph")] (testing "Adds all triples" - (let [repo (repo/repo)] + (with-open [repo (repo/->connection (repo/repo))] (add-batched repo triples) (is (= (set triples) (set (statements repo)))))) (testing "Adds all triples with graph" - (let [expected-quads (map #(assoc % :c graph) triples) - repo (repo/repo)] - (add-batched repo graph triples) - (is (= (set expected-quads) (set (statements repo)))))) + (let [expected-quads (map #(assoc % :c graph) triples)] + (with-open [repo (repo/->connection (repo/repo))] + (add-batched repo graph triples) + (is (= (set expected-quads) (set (statements repo))))))) (testing "Adds all triples in sized batches" - (let [repo (repo/repo) - batch-sizes (atom []) - recording-repo (->BatchSizeRecordingRepository repo batch-sizes)] - (add-batched recording-repo triples 3) - (is (= (set triples) (set (statements repo)))) - (is (= [3 3 3 1] @batch-sizes)))) + (with-open [repo (repo/->connection (repo/repo))] + (let [batch-sizes (atom []) + recording-repo (->BatchSizeRecordingRepository repo batch-sizes)] + (add-batched recording-repo triples 3) + (is (= (set triples) (set (statements repo)))) + (is (= [3 3 3 1] @batch-sizes))))) (testing "Adds all triples with graph in sized batches" - (let [expected-quads (map #(assoc % :c graph) triples) - repo (repo/repo) - batch-sizes (atom []) - recording-repo (->BatchSizeRecordingRepository repo batch-sizes)] - (add-batched recording-repo graph triples 5) - (is (= (set expected-quads) (set (statements repo)))) - (is (= [5 5] @batch-sizes)))))) + (with-open [repo (repo/->connection (repo/repo))] + (let [expected-quads (map #(assoc % :c graph) triples) + batch-sizes (atom []) + recording-repo (->BatchSizeRecordingRepository repo batch-sizes)] + (add-batched recording-repo graph triples 5) + (is (= (set expected-quads) (set (statements repo)))) + (is (= [5 5] @batch-sizes))))))) (defn- triple->quad [graph triple] (assoc triple :c graph)) @@ -89,31 +89,31 @@ graph (URI. "http://test-graph") make-quads (fn [triples] (map #(triple->quad graph %) triples))] (testing "Deletes all triples" - (let [repo (repo/repo)] + (with-open [repo (repo/->connection (repo/repo))] (add repo initial-triples) (delete-batched repo to-delete) (is (= (set to-keep) (set (statements repo)))))) (testing "Deletes all triples with graph" - (let [repo (repo/repo)] + (with-open [repo (repo/->connection (repo/repo))] (add repo (make-quads initial-triples)) (delete-batched repo graph to-delete) (is (= (set (make-quads to-keep)) (set (statements repo)))))) (testing "Deletes all triples in sized batches" - (let [repo (repo/repo) - batch-sizes (atom []) - recording-repo (->BatchSizeRecordingRepository repo batch-sizes)] - (add repo initial-triples) - (delete-batched recording-repo to-delete 4) - (is (= (set to-keep) (set (statements repo)))) - (is (= [4 2] @batch-sizes)))) + (with-open [repo (repo/->connection (repo/repo))] + (let [batch-sizes (atom []) + recording-repo (->BatchSizeRecordingRepository repo batch-sizes)] + (add repo initial-triples) + (delete-batched recording-repo to-delete 4) + (is (= (set to-keep) (set (statements repo)))) + (is (= [4 2] @batch-sizes))))) (testing "Deletes all triples with graph in sized batches" - (let [repo (repo/repo) - batch-sizes (atom []) - recording-repo (->BatchSizeRecordingRepository repo batch-sizes)] - (add repo (make-quads initial-triples)) - (delete-batched recording-repo graph to-delete 4) - (is (= (set (make-quads to-keep)) (set (statements repo)))) - (is (= [4 2] @batch-sizes)))))) + (with-open [repo (repo/->connection (repo/repo))] + (let [batch-sizes (atom []) + recording-repo (->BatchSizeRecordingRepository repo batch-sizes)] + (add repo (make-quads initial-triples)) + (delete-batched recording-repo graph to-delete 4) + (is (= (set (make-quads to-keep)) (set (statements repo)))) + (is (= [4 2] @batch-sizes))))))) From 5bec3fd11862f89be83b5371e3a88dd8e449b46b Mon Sep 17 00:00:00 2001 From: Rick Moynihan Date: Thu, 20 Jul 2017 13:47:27 +0100 Subject: [PATCH 10/10] Bump version to 0.10.0-SNAPSHOT --- project.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project.clj b/project.clj index 55e83a8a..668d74ab 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject grafter/grafter "0.9.0-SNAPSHOT" +(defproject grafter/grafter "0.10.0-SNAPSHOT" :description "Tools for the hard graft of linked data processing" :url "http://grafter.org/" :license {:name "Eclipse Public License - v1.0"