Skip to content

Commit

Permalink
v3.0.0, using Julia 1.6 and TOML.jl instead of YAML.jl
Browse files Browse the repository at this point in the history
  • Loading branch information
Lawrie authored and Lawrie committed Feb 10, 2021
1 parent c206c56 commit de00fd9
Show file tree
Hide file tree
Showing 15 changed files with 316 additions and 189 deletions.
162 changes: 110 additions & 52 deletions Manifest.toml
Original file line number Diff line number Diff line change
@@ -1,42 +1,53 @@
# This file is machine-generated - editing it directly is not advised

[[ArgTools]]
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"

[[Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"

[[Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"

[[CSV]]
deps = ["CategoricalArrays", "DataFrames", "Dates", "FilePathsBase", "Mmap", "Parsers", "PooledArrays", "Tables", "Unicode", "WeakRefStrings"]
git-tree-sha1 = "52a8e60c7822f53d57e4403b7f2811e7e1bdd32b"
deps = ["Dates", "Mmap", "Parsers", "PooledArrays", "SentinelArrays", "Tables", "Unicode"]
git-tree-sha1 = "1f79803452adf73e2d3fc84785adb7aaca14db36"
uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
version = "0.6.2"
version = "0.8.3"

[[CategoricalArrays]]
deps = ["DataAPI", "Future", "JSON", "Missings", "Printf", "Statistics", "Unicode"]
git-tree-sha1 = "a6c17353ee38ddab30e73dcfaa1107752de724ec"
deps = ["DataAPI", "Future", "JSON", "Missings", "Printf", "Statistics", "StructTypes", "Unicode"]
git-tree-sha1 = "99809999c8ee01fa89498480b147f7394ea5450f"
uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597"
version = "0.8.1"
version = "0.9.2"

[[Compat]]
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
git-tree-sha1 = "054993b6611376ddb40203e973e954fd9d1d1902"
git-tree-sha1 = "919c7f3151e79ff196add81d7f4e45d91bbf420b"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "3.12.0"
version = "3.25.0"

[[Crayons]]
git-tree-sha1 = "3f71217b538d7aaee0b69ab47d9b7724ca8afa0d"
uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f"
version = "4.0.4"

[[DataAPI]]
git-tree-sha1 = "176e23402d80e7743fc26c19c681bfb11246af32"
git-tree-sha1 = "8ab70b4de35bb3b8cc19654f6b893cf5164f8ee8"
uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
version = "1.3.0"
version = "1.5.1"

[[DataFrames]]
deps = ["CategoricalArrays", "Compat", "DataAPI", "Future", "InvertedIndices", "IteratorInterfaceExtensions", "Missings", "PooledArrays", "Printf", "REPL", "Reexport", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"]
git-tree-sha1 = "02f08ae77249b7f6d4186b081a016fb7454c616f"
deps = ["CategoricalArrays", "Compat", "DataAPI", "Future", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrettyTables", "Printf", "REPL", "Reexport", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"]
git-tree-sha1 = "b0db5579803eabb33f1274ca7ca2f472fdfb7f2a"
uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
version = "0.21.2"
version = "0.22.5"

[[DataStructures]]
deps = ["InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "be680f1ad03c0a03796aa3fda5a2180df7f83b46"
deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.17.18"
version = "0.18.9"

[[DataValueInterfaces]]
git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
Expand All @@ -55,11 +66,15 @@ uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"

[[FilePathsBase]]
deps = ["Dates", "LinearAlgebra", "Printf", "Test", "UUIDs"]
git-tree-sha1 = "923fd3b942a11712435682eaa95cc8518c428b2c"
uuid = "48062228-2e41-5def-b9a4-89aafe57970f"
version = "0.8.0"
[[Downloads]]
deps = ["ArgTools", "LibCURL", "NetworkOptions"]
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"

[[Formatting]]
deps = ["Printf"]
git-tree-sha1 = "8339d61043228fdd3eb658d86c926cb282ae72a8"
uuid = "59287772-0a20-5a39-b81b-1366585eb4c0"
version = "0.4.2"

[[Future]]
deps = ["Random"]
Expand All @@ -82,14 +97,26 @@ version = "1.0.0"

[[JSON]]
deps = ["Dates", "Mmap", "Parsers", "Unicode"]
git-tree-sha1 = "b34d7cef7b337321e97d22242c3c2b91f476748e"
git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4"
uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
version = "0.21.0"
version = "0.21.1"

[[LibCURL]]
deps = ["LibCURL_jll", "MozillaCACerts_jll"]
uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"

[[LibCURL_jll]]
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"

[[LibGit2]]
deps = ["Printf"]
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"

[[LibSSH2_jll]]
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"

[[Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"

Expand All @@ -104,57 +131,78 @@ uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"

[[MbedTLS_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"

[[Missings]]
deps = ["DataAPI"]
git-tree-sha1 = "de0a5ce9e5289f27df672ffabef4d1e5861247d5"
git-tree-sha1 = "f8c673ccc215eb50fcadb285f522420e29e69e1c"
uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
version = "0.4.3"
version = "0.4.5"

[[Mmap]]
uuid = "a63ad114-7e13-5084-954f-fe012c677804"

[[MozillaCACerts_jll]]
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"

[[NetworkOptions]]
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"

[[OrderedCollections]]
git-tree-sha1 = "12ce190210d278e12644bcadf5b21cbdcf225cd3"
git-tree-sha1 = "d45739abcfc03b51f6a42712894a593f74c80a23"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.2.0"
version = "1.3.3"

[[Parsers]]
deps = ["Dates", "Test"]
git-tree-sha1 = "eb3e09940c0d7ae01b01d9291ebad7b081c844d3"
deps = ["Dates"]
git-tree-sha1 = "50c9a9ed8c714945e01cd53a21007ed3865ed714"
uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
version = "1.0.5"
version = "1.0.15"

[[Pkg]]
deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs"]
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"

[[PooledArrays]]
deps = ["DataAPI"]
git-tree-sha1 = "b1333d4eced1826e15adbdf01a4ecaccca9d353c"
git-tree-sha1 = "0e8f5c428a41a81cd71f76d76f2fc3415fe5a676"
uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
version = "0.5.3"
version = "1.1.0"

[[PrettyTables]]
deps = ["Crayons", "Formatting", "Markdown", "Reexport", "Tables"]
git-tree-sha1 = "42126c4e2677cdc664baea004c98cc60a664fe40"
uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
version = "0.11.0"

[[Printf]]
deps = ["Unicode"]
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"

[[REPL]]
deps = ["InteractiveUtils", "Markdown", "Sockets"]
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"

[[Random]]
deps = ["Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

[[Reexport]]
deps = ["Pkg"]
git-tree-sha1 = "7b1d07f411bc8ddb7977ec7f377b97b158514fe0"
git-tree-sha1 = "57d8440b0c7d98fc4f889e478e80f268d534c9d5"
uuid = "189a3867-3050-52da-a836-e630ba90ab69"
version = "0.2.0"
version = "1.0.0"

[[SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"

[[SentinelArrays]]
deps = ["Dates", "Random"]
git-tree-sha1 = "6ccde405cf0759eba835eb613130723cb8f10ff9"
uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c"
version = "1.2.16"

[[Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"

Expand All @@ -179,6 +227,16 @@ uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[[StructTypes]]
deps = ["Dates", "UUIDs"]
git-tree-sha1 = "65a43f5218197bc7091b76bc273a5e323a1d7b0d"
uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4"
version = "1.2.3"

[[TOML]]
deps = ["Dates"]
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"

[[TableTraits]]
deps = ["IteratorInterfaceExtensions"]
git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e"
Expand All @@ -187,12 +245,16 @@ version = "1.0.0"

[[Tables]]
deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"]
git-tree-sha1 = "c45dcc27331febabc20d86cb3974ef095257dcf3"
git-tree-sha1 = "a716dde43d57fa537a19058d044b495301ba6565"
uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
version = "1.0.4"
version = "1.3.2"

[[Tar]]
deps = ["ArgTools", "SHA"]
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"

[[Test]]
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[[UUIDs]]
Expand All @@ -202,14 +264,10 @@ uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
[[Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

[[WeakRefStrings]]
deps = ["DataAPI", "Random", "Test"]
git-tree-sha1 = "28807f85197eaad3cbd2330386fac1dcb9e7e11d"
uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5"
version = "0.6.2"

[[YAML]]
deps = ["Base64", "Dates", "Printf"]
git-tree-sha1 = "c5e2eaa5ce818c5277388377d592eb4c81f27c00"
uuid = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
version = "0.4.0"
[[Zlib_jll]]
deps = ["Libdl"]
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"

[[nghttp2_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
17 changes: 8 additions & 9 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,25 +1,24 @@
name = "Schemata"
uuid = "b4d66a32-c6c0-5461-b6fa-34bb9cecaf85"
authors = ["Jock Lawrie <[email protected]>"]
version = "2.0.8"
version = "3.0.0"

[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Parsers = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"

[compat]
CSV = "0.6.2"
CategoricalArrays = "0.8.1"
DataFrames = "0.21.2"
Parsers = "1.0.5"
Tables = "1.0.4"
YAML = "0.4.0"
julia = "1"
CSV = "0.8.3"
CategoricalArrays = "0.9.2"
DataFrames = "0.22.5"
Parsers = "1.0.15"
Tables = "1.3.2"
julia = "1.6"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
61 changes: 31 additions & 30 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ It exists independently of any particular data set, and therefore can be constru

This package facilitates 3 use cases:

1. Read/write a schema from/to a yaml file.
1. Read/write a schema from/to a [TOML](https://toml.io/en/v1.0.0) file.

2. Compare a data set to a schema and list the non-compliance issues.

Expand All @@ -19,28 +19,29 @@ Indeed the 3 use cases listed above can be carried out without writing any Julia

# Usage

A `TableSchema` looks like this `yaml` file:

```YAML
name: mytable
description: "My table"
primarykey: patientid # A column name or a vector of column names
columns:
- patientid: {description: Patient ID, datatype: UInt, iscategorical: false, isrequired: true, isunique: true, validvalues: UInt}
- age: {description: Age (years), datatype: Int, iscategorical: false, isrequired: true, isunique: false, validvalues: "0:120"}
- dose: {description: Dose size, datatype: String, iscategorical: true, isrequired: true, isunique: false,
validvalues: ["small", "medium", "large"]
- fever: {description: Had fever, datatype: Bool, iscategorical: true, isrequired: true, isunique: false, validvalues: Bool}
A `TableSchema` looks like this `TOML` file:

```toml
name = "mytable"
description = "My table"
primarykey = "patientid" # A column name or a vector of column names
columns = [
{name = "patientid", description = "Patient ID", datatype = "UInt", validvalues = "UInt", iscategorical = false, isrequired = true, isunique = true},
{name = "age", description = "Age (years)", datatype = "Int", validvalues = "Int", iscategorical = false, isrequired = true, isunique = false},
{name = "dose", description = "Dose size", datatype = "String", validvalues = ["small", "medium", "large"], iscategorical = true, isrequired = true, isunique = false},
{name = "fever", description = "Had fever", datatype = "Bool", validvalues = "Bool", iscategorical = true, isrequired = true, isunique = false}
]
```

A `Schema` contains 1 or more `TableSchema`. For example:

```YAML
name: fever
description: "Fever schema"
tables:
table1: *table1_schema
table2: *table2_schema
```TOML
name = "fever"
description = "Fever schema"

[tables]
table1 = "table1_schema"
table2 = "table2_schema"
```

For tables that fit into memory, usage is as follows:
Expand All @@ -49,7 +50,7 @@ For tables that fit into memory, usage is as follows:
# Read in a schema
using Schemata

schema = readschema(joinpath(dirname(pathof(Schemata)), "..", "test/schemata/fever.yaml"))
schema = readschema(joinpath(dirname(pathof(Schemata)), "..", "test/schemata/fever.toml"))
ts = schema.tables[:mytable] # TableSchema for mytable

# Construct/import a table (any object that satisfies the Tables.jl interface)
Expand Down Expand Up @@ -152,16 +153,16 @@ We often want to ensure that certain relationships hold between variables within
For example, we might require that a person's marriage date is after his/her birth date.
We can achieve this by specifying one or more intra-row constraints in a `TableSchema` as follows:

```yaml
name: intrarow_constraints_demo
description: "Table with intra-row constraints"
primarykey: id
intrarow_constraints:
birth date before marriage date: "r[:dob] < r[:date_of_marriage]"
columns:
- id: {description: ID, datatype: UInt, iscategorical: false, isrequired: true, isunique: true, validvalues: UInt}
- dob: {description: Date of birth, datatype: Date, iscategorical: false, isrequired: true, isunique: false, validvalues: Date}
- date_of_marriage: {description: Date of marriage, datatype: Date, iscategorical: false, isrequired: false, isunique: false, validvalues: Date}
```toml
name = "intrarow_constraints_demo"
description = "Table with intra-row constraints"
primarykey = "patientid"
intrarow_constraints = {"birth date before marriage date" = "r[:dob] < r[:date_of_marriage]"}
columns = [
{name="patientid", description = "Patient ID", datatype = "UInt", validvalues = "UInt", iscategorical = false, isrequired = true, isunique = true},
{name="dob", description = "Date of birth", datatype = "Date", validvalues = "Date", iscategorical = false, isrequired = true, isunique = false},
{name="date_of_marriage", description = "Date of marriage", datatype = "Date", validvalues = "Date", iscategorical = false, isrequired = false, isunique = false}
]
```

Each constraint is specified as a key-value pair, where the key is a description of the constraint and
Expand Down
Loading

0 comments on commit de00fd9

Please sign in to comment.