From 9647350303fcf6d971dee52df09037190ad72083 Mon Sep 17 00:00:00 2001 From: Michael Huang Date: Thu, 3 Mar 2022 08:37:35 +0800 Subject: [PATCH 1/4] add format constraint for DATE and DATETIME types --- resources/test/adur-public-toilets.csv | 2 +- ...chema-with-value-constraints.expected.json | 10 +- ...ur-public-toilets.csv.schema.expected.json | 239 ------------------ src/cmd/schema.rs | 13 + tests/test_schema.rs | 21 +- tests/test_validate.rs | 27 +- 6 files changed, 47 insertions(+), 265 deletions(-) delete mode 100644 resources/test/adur-public-toilets.csv.schema.expected.json diff --git a/resources/test/adur-public-toilets.csv b/resources/test/adur-public-toilets.csv index 1823c9432..f7738ea89 100644 --- a/resources/test/adur-public-toilets.csv +++ b/resources/test/adur-public-toilets.csv @@ -1,5 +1,5 @@ ExtractDate,OrganisationURI,OrganisationLabel,ServiceTypeURI,ServiceTypeLabel,LocationText,CoordinateReferenceSystem,GeoX,GeoY,GeoPointLicensingURL,Category,AccessibleCategory,RADARKeyNeeded,BabyChange,FamilyToilet,ChangingPlace,AutomaticPublicConvenience,FullTimeStaffing,PartOfCommunityScheme,CommunitySchemeName,ChargeAmount,InfoURL,OpeningHours,ManagedBy,ReportEmail,ReportTel,Notes,UPRN,Postcode,StreetAddress,GeoAreaURI,GeoAreaLabel - ,http://opendatacommunities.org/id/district-council/adur,,http://id.esd.org.uk/service/579,Public toilets,BEACH GREEN PUBLIC CONVENIENCES BRIGHTON ROAD LANCING,OSGB36,518072,103649,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,"S = 09:00 - 21:00 W = 09:00 - 17:00 ",ADC,surveyor_1@adur-worthing.gov.uk,01903 221471,,60001449,,BEACH GREEN PUBLIC CONVENIENCES BRIGHTON ROAD LANCING,, +,http://opendatacommunities.org/id/district-council/adur,,http://id.esd.org.uk/service/579,Public toilets,BEACH GREEN PUBLIC CONVENIENCES BRIGHTON ROAD LANCING,OSGB36,518072,103649,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,"S = 09:00 - 21:00 W = 09:00 - 17:00 ",ADC,surveyor_1@adur-worthing.gov.uk,01903 221471,,60001449,,BEACH GREEN PUBLIC CONVENIENCES BRIGHTON ROAD LANCING,, 07/07/2014 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,PUBLIC CONVENIENCES MONKS RECREATION GROUND CRABTREE LANE LANCING,OSGB36,518225,104730,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,None,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,"S = 09:00 - 15:00 W = 09:00 - 15:00",ADC,surveyor_2@adur-worthing.gov.uk,01903 221471,,60002210,,PUBLIC CONVENIENCES MONKS RECREATION GROUND CRABTREE LANE LANCING,, 2014-07-07 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,PUBLIC CONVENIENCES SHOPSDAM ROAD LANCING,OSGB3,518915,103795,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Mens,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,"S = 09:00 - 21:00 W = 09:00 - 17:00",ADC,surveyor_3@adur-worthing.gov.uk,01903 221471,,60007428,,,, 07/07/2014 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,PUBLIC CONVENIENCES YEW TREE CLOSE LANCING,OSGB36,518222,104168,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,"S = 09:00 - 21:00 W = 09:00 - 17:00",ADC,surveyor_4@adur-worthing.gov.uk,01903 221471,,60008859,,PUBLIC CONVENIENCES YEW TREE CLOSE LANCING,, diff --git a/resources/test/adur-public-toilets.csv.schema-with-value-constraints.expected.json b/resources/test/adur-public-toilets.csv.schema-with-value-constraints.expected.json index 23c24d849..e0fca0eec 100644 --- a/resources/test/adur-public-toilets.csv.schema-with-value-constraints.expected.json +++ b/resources/test/adur-public-toilets.csv.schema-with-value-constraints.expected.json @@ -6,14 +6,10 @@ "properties": { "ExtractDate": { "description": "ExtractDate column from adur-public-toilets.csv", - "minLength": 2, - "maxLength": 16, + "format": "date-time", "type": [ - "string" - ], - "enum": [ - "07/07/2014 00:00", - "2014-07-07 00:00" + "string", + "null" ] }, "OrganisationURI": { diff --git a/resources/test/adur-public-toilets.csv.schema.expected.json b/resources/test/adur-public-toilets.csv.schema.expected.json deleted file mode 100644 index 8fd2c069a..000000000 --- a/resources/test/adur-public-toilets.csv.schema.expected.json +++ /dev/null @@ -1,239 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft-07/schema", - "title": "JSON Schema for adur-public-toilets.csv", - "description": "Inferred JSON Schema from QSV schema command", - "type": "object", - "properties": { - "ExtractDate": { - "type": [ - "string" - ], - "description": "ExtractDate column from adur-public-toilets.csv" - }, - "OrganisationURI": { - "type": [ - "string" - ], - "description": "OrganisationURI column from adur-public-toilets.csv" - }, - "OrganisationLabel": { - "type": [ - "string", - "null" - ], - "description": "OrganisationLabel column from adur-public-toilets.csv" - }, - "ServiceTypeURI": { - "type": [ - "string" - ], - "description": "ServiceTypeURI column from adur-public-toilets.csv" - }, - "ServiceTypeLabel": { - "type": [ - "string" - ], - "description": "ServiceTypeLabel column from adur-public-toilets.csv" - }, - "LocationText": { - "type": [ - "string" - ], - "description": "LocationText column from adur-public-toilets.csv" - }, - "CoordinateReferenceSystem": { - "type": [ - "string" - ], - "description": "CoordinateReferenceSystem column from adur-public-toilets.csv" - }, - "GeoX": { - "type": [ - "integer" - ], - "description": "GeoX column from adur-public-toilets.csv" - }, - "GeoY": { - "type": [ - "integer" - ], - "description": "GeoY column from adur-public-toilets.csv" - }, - "GeoPointLicensingURL": { - "type": [ - "string" - ], - "description": "GeoPointLicensingURL column from adur-public-toilets.csv" - }, - "Category": { - "type": [ - "string" - ], - "description": "Category column from adur-public-toilets.csv" - }, - "AccessibleCategory": { - "type": [ - "string" - ], - "description": "AccessibleCategory column from adur-public-toilets.csv" - }, - "RADARKeyNeeded": { - "type": [ - "string" - ], - "description": "RADARKeyNeeded column from adur-public-toilets.csv" - }, - "BabyChange": { - "type": [ - "string" - ], - "description": "BabyChange column from adur-public-toilets.csv" - }, - "FamilyToilet": { - "type": [ - "string" - ], - "description": "FamilyToilet column from adur-public-toilets.csv" - }, - "ChangingPlace": { - "type": [ - "string" - ], - "description": "ChangingPlace column from adur-public-toilets.csv" - }, - "AutomaticPublicConvenience": { - "type": [ - "string" - ], - "description": "AutomaticPublicConvenience column from adur-public-toilets.csv" - }, - "FullTimeStaffing": { - "type": [ - "string" - ], - "description": "FullTimeStaffing column from adur-public-toilets.csv" - }, - "PartOfCommunityScheme": { - "type": [ - "string" - ], - "description": "PartOfCommunityScheme column from adur-public-toilets.csv" - }, - "CommunitySchemeName": { - "type": [ - "null" - ], - "description": "CommunitySchemeName column from adur-public-toilets.csv" - }, - "ChargeAmount": { - "type": [ - "null" - ], - "description": "ChargeAmount column from adur-public-toilets.csv" - }, - "InfoURL": { - "type": [ - "string" - ], - "description": "InfoURL column from adur-public-toilets.csv" - }, - "OpeningHours": { - "type": [ - "string", - "null" - ], - "description": "OpeningHours column from adur-public-toilets.csv" - }, - "ManagedBy": { - "type": [ - "string", - "null" - ], - "description": "ManagedBy column from adur-public-toilets.csv" - }, - "ReportEmail": { - "type": [ - "string" - ], - "description": "ReportEmail column from adur-public-toilets.csv" - }, - "ReportTel": { - "type": [ - "string" - ], - "description": "ReportTel column from adur-public-toilets.csv" - }, - "Notes": { - "type": [ - "string", - "null" - ], - "description": "Notes column from adur-public-toilets.csv" - }, - "UPRN": { - "type": [ - "integer" - ], - "description": "UPRN column from adur-public-toilets.csv" - }, - "Postcode": { - "type": [ - "null" - ], - "description": "Postcode column from adur-public-toilets.csv" - }, - "StreetAddress": { - "type": [ - "string", - "null" - ], - "description": "StreetAddress column from adur-public-toilets.csv" - }, - "GeoAreaURI": { - "type": [ - "null" - ], - "description": "GeoAreaURI column from adur-public-toilets.csv" - }, - "GeoAreaLabel": { - "type": [ - "null" - ], - "description": "GeoAreaLabel column from adur-public-toilets.csv" - } - }, - "required": [ - "ExtractDate", - "OrganisationURI", - "OrganisationLabel", - "ServiceTypeURI", - "ServiceTypeLabel", - "LocationText", - "CoordinateReferenceSystem", - "GeoX", - "GeoY", - "GeoPointLicensingURL", - "Category", - "AccessibleCategory", - "RADARKeyNeeded", - "BabyChange", - "FamilyToilet", - "ChangingPlace", - "AutomaticPublicConvenience", - "FullTimeStaffing", - "PartOfCommunityScheme", - "CommunitySchemeName", - "ChargeAmount", - "InfoURL", - "OpeningHours", - "ManagedBy", - "ReportEmail", - "ReportTel", - "Notes", - "UPRN", - "Postcode", - "StreetAddress", - "GeoAreaURI", - "GeoAreaLabel" - ] -} \ No newline at end of file diff --git a/src/cmd/schema.rs b/src/cmd/schema.rs index c48edb863..3b9a6093b 100644 --- a/src/cmd/schema.rs +++ b/src/cmd/schema.rs @@ -36,6 +36,7 @@ Usage: Schema options: --enum-threshold NUM Cardinality threshold for adding enum constraints [default: 50] + --strict-dates Enforce Internet Datetime format (RFC-3339) for detected datetime columns --pattern-columns Select columns to add pattern constraints Common options: @@ -51,6 +52,7 @@ Common options: #[derive(Deserialize, Debug)] struct Args { flag_enum_threshold: usize, + flag_strict_dates: bool, flag_pattern_columns: SelectColumns, flag_no_headers: bool, flag_delimiter: Option, @@ -216,6 +218,17 @@ fn infer_schema_from_stats(args: &Args, input_filename: &str) -> CliResult { type_list.push(Value::String("string".to_string())); + + if args.flag_strict_dates { + field_map.insert("format".to_string(), Value::String("date".to_string())); + } + } + "DateTime" => { + type_list.push(Value::String("string".to_string())); + + if args.flag_strict_dates { + field_map.insert("format".to_string(), Value::String("date-time".to_string())); + } } "Integer" => { type_list.push(Value::String("integer".to_string())); diff --git a/tests/test_schema.rs b/tests/test_schema.rs index 44016b0ee..702701401 100644 --- a/tests/test_schema.rs +++ b/tests/test_schema.rs @@ -18,6 +18,7 @@ fn generate_schema_with_value_constraints_then_feed_into_validate() { cmd.arg("13"); cmd.arg("--pattern-columns"); cmd.arg("ReportEmail,OpeningHours"); + cmd.arg("--strict-dates"); wrk.output(&mut cmd); // load output schema file @@ -44,9 +45,23 @@ fn generate_schema_with_value_constraints_then_feed_into_validate() { wrk.output(&mut cmd2); // validation report - let validation_errors_expected = "row_number\tfield\terror\n\ - 1\tExtractDate\tnull is not of type \"string\"\n\ - 1\tExtractDate\tnull is not one of [\"07/07/2014 00:00\",\"2014-07-07 00:00\"]\n"; + let validation_errors_expected = r#"row_number field error +2 ExtractDate "07/07/2014 00:00" is not a "date-time" +3 ExtractDate "2014-07-07 00:00" is not a "date-time" +4 ExtractDate "07/07/2014 00:00" is not a "date-time" +5 ExtractDate "07/07/2014 00:00" is not a "date-time" +6 ExtractDate "07/07/2014 00:00" is not a "date-time" +7 ExtractDate "07/07/2014 00:00" is not a "date-time" +8 ExtractDate "07/07/2014 00:00" is not a "date-time" +9 ExtractDate "07/07/2014 00:00" is not a "date-time" +10 ExtractDate "07/07/2014 00:00" is not a "date-time" +11 ExtractDate "07/07/2014 00:00" is not a "date-time" +12 ExtractDate "07/07/2014 00:00" is not a "date-time" +13 ExtractDate "07/07/2014 00:00" is not a "date-time" +14 ExtractDate "07/07/2014 00:00" is not a "date-time" +15 ExtractDate "07/07/2014 00:00" is not a "date-time" +"#; + // check validation error output let validation_error_output: String = diff --git a/tests/test_validate.rs b/tests/test_validate.rs index 5e246dddd..548aeebde 100644 --- a/tests/test_validate.rs +++ b/tests/test_validate.rs @@ -44,6 +44,16 @@ fn adur_errors() -> &'static str { 3\tCategory\t\"Mens\" does not match \"(Female|Male|Female and Male|Unisex|Male urinal|Children only|None)\"\n" } +// invalid records with index from original csv +// row 1: missing values for ExtractDate and OrganisationLabel +// row 3: wrong value for CoordinateReferenceSystem and Category +// note: removed unnecessary quotes for string column "OpeningHours" +fn adur_invalids() -> &'static str { + "ExtractDate,OrganisationURI,OrganisationLabel,ServiceTypeURI,ServiceTypeLabel,LocationText,CoordinateReferenceSystem,GeoX,GeoY,GeoPointLicensingURL,Category,AccessibleCategory,RADARKeyNeeded,BabyChange,FamilyToilet,ChangingPlace,AutomaticPublicConvenience,FullTimeStaffing,PartOfCommunityScheme,CommunitySchemeName,ChargeAmount,InfoURL,OpeningHours,ManagedBy,ReportEmail,ReportTel,Notes,UPRN,Postcode,StreetAddress,GeoAreaURI,GeoAreaLabel\n\ + ,http://opendatacommunities.org/id/district-council/adur,,http://id.esd.org.uk/service/579,Public toilets,BEACH GREEN PUBLIC CONVENIENCES BRIGHTON ROAD LANCING,OSGB36,518072,103649,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 09:00 - 21:00 W = 09:00 - 17:00 ,ADC,surveyor_1@adur-worthing.gov.uk,01903 221471,,60001449,,BEACH GREEN PUBLIC CONVENIENCES BRIGHTON ROAD LANCING,,\n\ + 2014-07-07 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,PUBLIC CONVENIENCES SHOPSDAM ROAD LANCING,OSGB3,518915,103795,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Mens,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 09:00 - 21:00 W = 09:00 - 17:00,ADC,surveyor_3@adur-worthing.gov.uk,01903 221471,,60007428,,,,\n" +} + #[test] fn validate_adur_public_toilets_dataset_with_json_schema() { let wrk = Workdir::new("validate").flexible(true); @@ -63,17 +73,8 @@ fn validate_adur_public_toilets_dataset_with_json_schema() { wrk.output(&mut cmd); // check invalid file output - - // invalid records with index from original csv - // row 1: missing values for ExtractDate and OrganisationLabel - // row 3: wrong value for CoordinateReferenceSystem and Category - // note: removed unnecessary quotes for string column "OpeningHours" - let invalid_expected = r#"ExtractDate,OrganisationURI,OrganisationLabel,ServiceTypeURI,ServiceTypeLabel,LocationText,CoordinateReferenceSystem,GeoX,GeoY,GeoPointLicensingURL,Category,AccessibleCategory,RADARKeyNeeded,BabyChange,FamilyToilet,ChangingPlace,AutomaticPublicConvenience,FullTimeStaffing,PartOfCommunityScheme,CommunitySchemeName,ChargeAmount,InfoURL,OpeningHours,ManagedBy,ReportEmail,ReportTel,Notes,UPRN,Postcode,StreetAddress,GeoAreaURI,GeoAreaLabel - ,http://opendatacommunities.org/id/district-council/adur,,http://id.esd.org.uk/service/579,Public toilets,BEACH GREEN PUBLIC CONVENIENCES BRIGHTON ROAD LANCING,OSGB36,518072,103649,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 09:00 - 21:00 W = 09:00 - 17:00 ,ADC,surveyor_1@adur-worthing.gov.uk,01903 221471,,60001449,,BEACH GREEN PUBLIC CONVENIENCES BRIGHTON ROAD LANCING,, -2014-07-07 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,PUBLIC CONVENIENCES SHOPSDAM ROAD LANCING,OSGB3,518915,103795,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Mens,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 09:00 - 21:00 W = 09:00 - 17:00,ADC,surveyor_3@adur-worthing.gov.uk,01903 221471,,60007428,,,, -"#; let invalid_output: String = wrk.from_str(&wrk.path("data.csv.invalid")); - assert_eq!(invalid_expected.to_string(), invalid_output); + assert_eq!(adur_invalids().to_string(), invalid_output); // check validation error output @@ -95,12 +96,8 @@ fn validate_adur_public_toilets_dataset_with_json_schema_url() { wrk.output(&mut cmd); - let invalid_expected = r#"ExtractDate,OrganisationURI,OrganisationLabel,ServiceTypeURI,ServiceTypeLabel,LocationText,CoordinateReferenceSystem,GeoX,GeoY,GeoPointLicensingURL,Category,AccessibleCategory,RADARKeyNeeded,BabyChange,FamilyToilet,ChangingPlace,AutomaticPublicConvenience,FullTimeStaffing,PartOfCommunityScheme,CommunitySchemeName,ChargeAmount,InfoURL,OpeningHours,ManagedBy,ReportEmail,ReportTel,Notes,UPRN,Postcode,StreetAddress,GeoAreaURI,GeoAreaLabel - ,http://opendatacommunities.org/id/district-council/adur,,http://id.esd.org.uk/service/579,Public toilets,BEACH GREEN PUBLIC CONVENIENCES BRIGHTON ROAD LANCING,OSGB36,518072,103649,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Female and male,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 09:00 - 21:00 W = 09:00 - 17:00 ,ADC,surveyor_1@adur-worthing.gov.uk,01903 221471,,60001449,,BEACH GREEN PUBLIC CONVENIENCES BRIGHTON ROAD LANCING,, -2014-07-07 00:00,http://opendatacommunities.org/id/district-council/adur,Adur,http://id.esd.org.uk/service/579,Public toilets,PUBLIC CONVENIENCES SHOPSDAM ROAD LANCING,OSGB3,518915,103795,http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html,Mens,Unisex,Yes,No,No,No,No,No,No,,,http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/,S = 09:00 - 21:00 W = 09:00 - 17:00,ADC,surveyor_3@adur-worthing.gov.uk,01903 221471,,60007428,,,, -"#; let invalid_output: String = wrk.from_str(&wrk.path("data.csv.invalid")); - assert_eq!(invalid_expected.to_string(), invalid_output); + assert_eq!(adur_invalids().to_string(), invalid_output); // check validation error output From 564a9ea2eca5aa9bff787737fbb48407cdad6b55 Mon Sep 17 00:00:00 2001 From: Michael Huang Date: Thu, 3 Mar 2022 16:40:06 +0800 Subject: [PATCH 2/4] schema test covers both default and strict schemas --- ...c-toilets.csv.schema-default.expected.json | 412 ++++++++++++++++++ ...c-toilets.csv.schema-strict.expected.json} | 0 tests/test_schema.rs | 57 ++- 3 files changed, 466 insertions(+), 3 deletions(-) create mode 100644 resources/test/adur-public-toilets.csv.schema-default.expected.json rename resources/test/{adur-public-toilets.csv.schema-with-value-constraints.expected.json => adur-public-toilets.csv.schema-strict.expected.json} (100%) diff --git a/resources/test/adur-public-toilets.csv.schema-default.expected.json b/resources/test/adur-public-toilets.csv.schema-default.expected.json new file mode 100644 index 000000000..d16733953 --- /dev/null +++ b/resources/test/adur-public-toilets.csv.schema-default.expected.json @@ -0,0 +1,412 @@ +{ + "$schema": "https://json-schema.org/draft-07/schema", + "title": "JSON Schema for adur-public-toilets.csv", + "description": "Inferred JSON Schema from QSV schema command", + "type": "object", + "properties": { + "ExtractDate": { + "description": "ExtractDate column from adur-public-toilets.csv", + "type": [ + "string", + "null" + ] + }, + "OrganisationURI": { + "description": "OrganisationURI column from adur-public-toilets.csv", + "minLength": 55, + "maxLength": 55, + "type": [ + "string" + ], + "enum": [ + "http://opendatacommunities.org/id/district-council/adur" + ] + }, + "OrganisationLabel": { + "description": "OrganisationLabel column from adur-public-toilets.csv", + "minLength": 0, + "maxLength": 4, + "type": [ + "string", + "null" + ], + "enum": [ + "Adur", + null + ] + }, + "ServiceTypeURI": { + "description": "ServiceTypeURI column from adur-public-toilets.csv", + "minLength": 32, + "maxLength": 32, + "type": [ + "string" + ], + "enum": [ + "http://id.esd.org.uk/service/579" + ] + }, + "ServiceTypeLabel": { + "description": "ServiceTypeLabel column from adur-public-toilets.csv", + "minLength": 14, + "maxLength": 14, + "type": [ + "string" + ], + "enum": [ + "Public toilets" + ] + }, + "LocationText": { + "description": "LocationText column from adur-public-toilets.csv", + "minLength": 40, + "maxLength": 86, + "type": [ + "string" + ], + "enum": [ + "BEACH GREEN PUBLIC CONVENIENCES BRIGHTON ROAD LANCING", + "BEACH TOILETS BASIN ROAD SOUTH SOUTHWICK", + "PUBLIC CONVENIENCE SOUTHWICK STREET SOUTHWICK", + "PUBLIC CONVENIENCES ADUR RECREATION GROUND BRIGHTON ROAD SHOREHAM-BY-SEA", + "PUBLIC CONVENIENCES BEACH GREEN SHOREHAM-BY-SEA", + "PUBLIC CONVENIENCES CEMETERY MILL LANE SHOREHAM-BY-SEA", + "PUBLIC CONVENIENCES FORTHAVEN SHOREHAM-BY-SEA", + "PUBLIC CONVENIENCES MIDDLE STREET SHOREHAM-BY-SEA", + "PUBLIC CONVENIENCES MONKS RECREATION GROUND CRABTREE LANE LANCING", + "PUBLIC CONVENIENCES SHOPSDAM ROAD LANCING", + "PUBLIC CONVENIENCES SOUTH PAVILION BUCKINGHAM PARK UPPER SHOREHAM ROAD SHOREHAM-BY-SEA", + "PUBLIC CONVENIENCES YEW TREE CLOSE LANCING", + "WEST BEACH PUBLIC CONVENIENCES WEST BEACH ROAD LANCING" + ] + }, + "CoordinateReferenceSystem": { + "description": "CoordinateReferenceSystem column from adur-public-toilets.csv", + "minLength": 5, + "maxLength": 6, + "type": [ + "string" + ], + "enum": [ + "OSGB3", + "OSGB36" + ] + }, + "GeoX": { + "description": "GeoX column from adur-public-toilets.csv", + "minimum": 518072, + "maximum": 524401, + "type": [ + "integer" + ] + }, + "GeoY": { + "description": "GeoY column from adur-public-toilets.csv", + "minimum": 103649, + "maximum": 106062, + "type": [ + "integer" + ] + }, + "GeoPointLicensingURL": { + "description": "GeoPointLicensingURL column from adur-public-toilets.csv", + "minLength": 124, + "maxLength": 124, + "type": [ + "string" + ], + "enum": [ + "http://www.ordnancesurvey.co.uk/business-and-government/help-and-support/public-sector/guidance/derived-data-exemptions.html" + ] + }, + "Category": { + "description": "Category column from adur-public-toilets.csv", + "minLength": 4, + "maxLength": 15, + "type": [ + "string" + ], + "enum": [ + "Female and male", + "Mens" + ] + }, + "AccessibleCategory": { + "description": "AccessibleCategory column from adur-public-toilets.csv", + "minLength": 4, + "maxLength": 6, + "type": [ + "string" + ], + "enum": [ + "None", + "Unisex" + ] + }, + "RADARKeyNeeded": { + "description": "RADARKeyNeeded column from adur-public-toilets.csv", + "minLength": 2, + "maxLength": 3, + "type": [ + "string" + ], + "enum": [ + "No", + "Yes" + ] + }, + "BabyChange": { + "description": "BabyChange column from adur-public-toilets.csv", + "minLength": 2, + "maxLength": 2, + "type": [ + "string" + ], + "enum": [ + "No" + ] + }, + "FamilyToilet": { + "description": "FamilyToilet column from adur-public-toilets.csv", + "minLength": 2, + "maxLength": 2, + "type": [ + "string" + ], + "enum": [ + "No" + ] + }, + "ChangingPlace": { + "description": "ChangingPlace column from adur-public-toilets.csv", + "minLength": 2, + "maxLength": 2, + "type": [ + "string" + ], + "enum": [ + "No" + ] + }, + "AutomaticPublicConvenience": { + "description": "AutomaticPublicConvenience column from adur-public-toilets.csv", + "minLength": 2, + "maxLength": 2, + "type": [ + "string" + ], + "enum": [ + "No" + ] + }, + "FullTimeStaffing": { + "description": "FullTimeStaffing column from adur-public-toilets.csv", + "minLength": 2, + "maxLength": 2, + "type": [ + "string" + ], + "enum": [ + "No" + ] + }, + "PartOfCommunityScheme": { + "description": "PartOfCommunityScheme column from adur-public-toilets.csv", + "minLength": 2, + "maxLength": 2, + "type": [ + "string" + ], + "enum": [ + "No" + ] + }, + "CommunitySchemeName": { + "description": "CommunitySchemeName column from adur-public-toilets.csv", + "type": [ + "null" + ] + }, + "ChargeAmount": { + "description": "ChargeAmount column from adur-public-toilets.csv", + "type": [ + "null" + ] + }, + "InfoURL": { + "description": "InfoURL column from adur-public-toilets.csv", + "minLength": 66, + "maxLength": 66, + "type": [ + "string" + ], + "enum": [ + "http://www.adur-worthing.gov.uk/streets-and-travel/public-toilets/" + ] + }, + "OpeningHours": { + "description": "OpeningHours column from adur-public-toilets.csv", + "minLength": 0, + "maxLength": 36, + "type": [ + "string", + "null" + ], + "enum": [ + "09.00 - 17.00", + "S = 08:00 - 21:00 W = 08:00 - 17:00", + "S = 09:00 - 15:00 W = 09:00 - 15:00", + "S = 09:00 - 21:00 W = 09:00 - 17:00", + null + ] + }, + "ManagedBy": { + "description": "ManagedBy column from adur-public-toilets.csv", + "minLength": 0, + "maxLength": 3, + "type": [ + "string", + "null" + ], + "enum": [ + "ADC", + null + ] + }, + "ReportEmail": { + "description": "ReportEmail column from adur-public-toilets.csv", + "minLength": 31, + "maxLength": 32, + "type": [ + "string" + ], + "enum": [ + "surveyor_10@adur-worthing.gov.uk", + "surveyor_11@adur-worthing.gov.uk", + "surveyor_12@adur-worthing.gov.uk", + "surveyor_13@adur-worthing.gov.uk", + "surveyor_14@adur-worthing.gov.uk", + "surveyor_15@adur-worthing.gov.uk", + "surveyor_1@adur-worthing.gov.uk", + "surveyor_2@adur-worthing.gov.uk", + "surveyor_3@adur-worthing.gov.uk", + "surveyor_4@adur-worthing.gov.uk", + "surveyor_5@adur-worthing.gov.uk", + "surveyor_6@adur-worthing.gov.uk", + "surveyor_7@adur-worthing.gov.uk", + "surveyor_8@adur-worthing.gov.uk", + "surveyor_9@adur-worthing.gov.uk" + ] + }, + "ReportTel": { + "description": "ReportTel column from adur-public-toilets.csv", + "minLength": 12, + "maxLength": 12, + "type": [ + "string" + ], + "enum": [ + "01903 221471" + ] + }, + "Notes": { + "description": "Notes column from adur-public-toilets.csv", + "minLength": 0, + "maxLength": 29, + "type": [ + "string", + "null" + ], + "enum": [ + "Grounds staff only not public", + null + ] + }, + "UPRN": { + "description": "UPRN column from adur-public-toilets.csv", + "minimum": 60001449, + "maximum": 60034215, + "type": [ + "integer" + ] + }, + "Postcode": { + "description": "Postcode column from adur-public-toilets.csv", + "type": [ + "null" + ] + }, + "StreetAddress": { + "description": "StreetAddress column from adur-public-toilets.csv", + "minLength": 0, + "maxLength": 86, + "type": [ + "string", + "null" + ], + "enum": [ + "BEACH GREEN PUBLIC CONVENIENCES BRIGHTON ROAD LANCING", + "BEACH TOILETS BASIN ROAD SOUTH SOUTHWICK", + "PUBLIC CONVENIENCE NORTH PAVILION BUCKINGHAM PARK UPPER SHOREHAM ROAD SHOREHAM-BY-SEA", + "PUBLIC CONVENIENCE SOUTHWICK STREET SOUTHWICK", + "PUBLIC CONVENIENCES ADUR RECREATION GROUND BRIGHTON ROAD SHOREHAM-BY-SEA", + "PUBLIC CONVENIENCES BEACH GREEN SHOREHAM-BY-SEA", + "PUBLIC CONVENIENCES CEMETERY MILL LANE SHOREHAM-BY-SEA", + "PUBLIC CONVENIENCES CIVIC CENTRE HAM ROAD SHOREHAM-BY-SEA", + "PUBLIC CONVENIENCES FORTHAVEN SHOREHAM-BY-SEA", + "PUBLIC CONVENIENCES MIDDLE STREET SHOREHAM-BY-SEA", + "PUBLIC CONVENIENCES MONKS RECREATION GROUND CRABTREE LANE LANCING", + "PUBLIC CONVENIENCES SOUTH PAVILION BUCKINGHAM PARK UPPER SHOREHAM ROAD SHOREHAM-BY-SEA", + "PUBLIC CONVENIENCES YEW TREE CLOSE LANCING", + "WEST BEACH PUBLIC CONVENIENCES WEST BEACH ROAD LANCING", + null + ] + }, + "GeoAreaURI": { + "description": "GeoAreaURI column from adur-public-toilets.csv", + "type": [ + "null" + ] + }, + "GeoAreaLabel": { + "description": "GeoAreaLabel column from adur-public-toilets.csv", + "type": [ + "null" + ] + } + }, + "required": [ + "ExtractDate", + "OrganisationURI", + "OrganisationLabel", + "ServiceTypeURI", + "ServiceTypeLabel", + "LocationText", + "CoordinateReferenceSystem", + "GeoX", + "GeoY", + "GeoPointLicensingURL", + "Category", + "AccessibleCategory", + "RADARKeyNeeded", + "BabyChange", + "FamilyToilet", + "ChangingPlace", + "AutomaticPublicConvenience", + "FullTimeStaffing", + "PartOfCommunityScheme", + "CommunitySchemeName", + "ChargeAmount", + "InfoURL", + "OpeningHours", + "ManagedBy", + "ReportEmail", + "ReportTel", + "Notes", + "UPRN", + "Postcode", + "StreetAddress", + "GeoAreaURI", + "GeoAreaLabel" + ] +} \ No newline at end of file diff --git a/resources/test/adur-public-toilets.csv.schema-with-value-constraints.expected.json b/resources/test/adur-public-toilets.csv.schema-strict.expected.json similarity index 100% rename from resources/test/adur-public-toilets.csv.schema-with-value-constraints.expected.json rename to resources/test/adur-public-toilets.csv.schema-strict.expected.json diff --git a/tests/test_schema.rs b/tests/test_schema.rs index 702701401..7eb7fad4c 100644 --- a/tests/test_schema.rs +++ b/tests/test_schema.rs @@ -1,9 +1,56 @@ use crate::workdir::Workdir; use assert_json_diff::assert_json_eq; use serde_json::Value; +use std::path::Path; #[test] -fn generate_schema_with_value_constraints_then_feed_into_validate() { +#[should_panic] +fn generate_schema_with_defaults_and_validate_with_no_errors() { + // create worksapce and invoke schema command with value constraints flag + let wrk = Workdir::new("schema").flexible(true); + + // copy csv file to workdir + let csv = wrk.load_test_resource("adur-public-toilets.csv"); + wrk.create_from_string("adur-public-toilets.csv", &csv); + + // run schema command with value constraints option + let mut cmd = wrk.command("schema"); + cmd.arg("adur-public-toilets.csv"); + wrk.output(&mut cmd); + + // load output schema file + let output_schema_string: String = + wrk.from_str(&wrk.path("adur-public-toilets.csv.schema.json")); + let output_schema_json = + serde_json::from_str(&output_schema_string).expect("parse schema json"); + + // make sure it's a valid JSON Schema by compiling with jsonschema library + jsonschema::JSONSchema::options() + .compile(&output_schema_json) + .expect("valid JSON Schema"); + + // diff output json with expected json + let expected_schema: String = + wrk.load_test_resource("adur-public-toilets.csv.schema-default.expected.json"); + let expected_schema_json: Value = serde_json::from_str(&expected_schema.to_string()).unwrap(); + assert_json_eq!(expected_schema_json, output_schema_json); + + // invoke validate command from schema created above + let mut cmd2 = wrk.command("validate"); + cmd2.arg("adur-public-toilets.csv"); + cmd2.arg("adur-public-toilets.csv.schema.json"); + wrk.output(&mut cmd2); + + // not expecting any invalid rows, so confirm there are NO output files generated + assert!( + Path::new(&wrk.path("adur-public-toilets.csv.validation-errors.tsv")).exists() == false + ); + assert!(Path::new(&wrk.path("adur-public-toilets.csv.valid")).exists() == false); + assert!(Path::new(&wrk.path("adur-public-toilets.csv.invalid")).exists() == false); +} + +#[test] +fn generate_schema_with_optinal_flags_and_validate_with_errors() { // create worksapce and invoke schema command with value constraints flag let wrk = Workdir::new("schema").flexible(true); @@ -33,8 +80,8 @@ fn generate_schema_with_value_constraints_then_feed_into_validate() { .expect("valid JSON Schema"); // diff output json with expected json - let expected_schema: String = wrk - .load_test_resource("adur-public-toilets.csv.schema-with-value-constraints.expected.json"); + let expected_schema: String = + wrk.load_test_resource("adur-public-toilets.csv.schema-strict.expected.json"); let expected_schema_json: Value = serde_json::from_str(&expected_schema.to_string()).unwrap(); assert_json_eq!(expected_schema_json, output_schema_json); @@ -62,6 +109,10 @@ fn generate_schema_with_value_constraints_then_feed_into_validate() { 15 ExtractDate "07/07/2014 00:00" is not a "date-time" "#; + // expecting invalid rows, so confirm there ARE output files generated + assert!(Path::new(&wrk.path("adur-public-toilets.csv.validation-errors.tsv")).exists() == true); + assert!(Path::new(&wrk.path("adur-public-toilets.csv.valid")).exists() == true); + assert!(Path::new(&wrk.path("adur-public-toilets.csv.invalid")).exists() == true); // check validation error output let validation_error_output: String = From d6a487d66c1d22187974ca1ee0fd7fc5b1baee5c Mon Sep 17 00:00:00 2001 From: Michael Huang Date: Thu, 3 Mar 2022 16:45:38 +0800 Subject: [PATCH 3/4] test should not panic --- tests/test_schema.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_schema.rs b/tests/test_schema.rs index 7eb7fad4c..74c4dc608 100644 --- a/tests/test_schema.rs +++ b/tests/test_schema.rs @@ -4,7 +4,6 @@ use serde_json::Value; use std::path::Path; #[test] -#[should_panic] fn generate_schema_with_defaults_and_validate_with_no_errors() { // create worksapce and invoke schema command with value constraints flag let wrk = Workdir::new("schema").flexible(true); From 99e9c501f602bc80dccf883942a69047e321599b Mon Sep 17 00:00:00 2001 From: Michael Huang Date: Thu, 3 Mar 2022 20:40:28 +0800 Subject: [PATCH 4/4] fix test by removing workdir contents --- tests/test_schema.rs | 15 ++++++++++----- tests/workdir.rs | 8 ++++++++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/tests/test_schema.rs b/tests/test_schema.rs index 74c4dc608..8a8125500 100644 --- a/tests/test_schema.rs +++ b/tests/test_schema.rs @@ -7,6 +7,7 @@ use std::path::Path; fn generate_schema_with_defaults_and_validate_with_no_errors() { // create worksapce and invoke schema command with value constraints flag let wrk = Workdir::new("schema").flexible(true); + wrk.clear_contents().unwrap(); // copy csv file to workdir let csv = wrk.load_test_resource("adur-public-toilets.csv"); @@ -41,17 +42,18 @@ fn generate_schema_with_defaults_and_validate_with_no_errors() { wrk.output(&mut cmd2); // not expecting any invalid rows, so confirm there are NO output files generated - assert!( - Path::new(&wrk.path("adur-public-toilets.csv.validation-errors.tsv")).exists() == false - ); + let validation_error_path = &wrk.path("adur-public-toilets.csv.validation-errors.tsv"); + println!("not expecting validation error file at: {validation_error_path:?}"); + assert!(Path::new(validation_error_path).exists() == false); assert!(Path::new(&wrk.path("adur-public-toilets.csv.valid")).exists() == false); assert!(Path::new(&wrk.path("adur-public-toilets.csv.invalid")).exists() == false); } #[test] -fn generate_schema_with_optinal_flags_and_validate_with_errors() { +fn generate_schema_with_optional_flags_and_validate_with_errors() { // create worksapce and invoke schema command with value constraints flag let wrk = Workdir::new("schema").flexible(true); + wrk.clear_contents().unwrap(); // copy csv file to workdir let csv = wrk.load_test_resource("adur-public-toilets.csv"); @@ -109,7 +111,10 @@ fn generate_schema_with_optinal_flags_and_validate_with_errors() { "#; // expecting invalid rows, so confirm there ARE output files generated - assert!(Path::new(&wrk.path("adur-public-toilets.csv.validation-errors.tsv")).exists() == true); + let validation_error_path = &wrk.path("adur-public-toilets.csv.validation-errors.tsv"); + println!("expecting validation error file at: {validation_error_path:?}"); + + assert!(Path::new(validation_error_path).exists() == true); assert!(Path::new(&wrk.path("adur-public-toilets.csv.valid")).exists() == true); assert!(Path::new(&wrk.path("adur-public-toilets.csv.invalid")).exists() == true); diff --git a/tests/workdir.rs b/tests/workdir.rs index 3115e12e2..e3a4e57f1 100644 --- a/tests/workdir.rs +++ b/tests/workdir.rs @@ -203,6 +203,14 @@ impl Workdir { pub fn qsv_bin(&self) -> PathBuf { self.root.join("qsv") } + + // clear all files in directory + pub fn clear_contents(&self) -> io::Result<()> { + for entry in fs::read_dir(&self.dir)? { + fs::remove_file(entry?.path())?; + } + Ok(()) + } } impl fmt::Debug for Workdir {