diff --git a/brickschema/brickify/src/handlers/Handler/RACHandler/RACHandler.py b/brickschema/brickify/src/handlers/Handler/RACHandler/RACHandler.py index b1c6d74..6d4356c 100644 --- a/brickschema/brickify/src/handlers/Handler/RACHandler/RACHandler.py +++ b/brickschema/brickify/src/handlers/Handler/RACHandler/RACHandler.py @@ -1,8 +1,7 @@ -from pathlib import Path -from typing import Optional, List - import typer +from pathlib import Path from tabulate import tabulate +from typing import Optional, List from brickschema.brickify.src.handlers.Handler.TableHandler import TableHandler from brickschema.brickify.util import ( @@ -72,6 +71,12 @@ def ingest_data(self): ) ) + replace_dict = {"headers": {}, "values": {}, "ignore_columns": {}} + if "replace_dict" in self.config: + for key in replace_dict.keys(): + if key in self.config["replace_dict"]: + replace_dict[key] = self.config["replace_dict"][key] + for index, sheet in enumerate(sheets): if index not in sheet_ids: continue @@ -81,9 +86,7 @@ def ingest_data(self): if not header_row: continue header = sheet.row_values(header_row) - header = cleaned_value( - value=header, replace_dict=self.config["replace_dict"]["headers"] - ) + header = cleaned_value(value=header, replace_dict=replace_dict["headers"]) header_dict = {idx: value for idx, value in enumerate(header)} for row_number in range(header_row + 1, sheet.nrows): row = sheet.row_values(row_number) @@ -108,8 +111,9 @@ def ingest_data(self): row_object = { key: cleaned_value( value, - replace_dict=self.config["replace_dict"]["values"], + replace_dict=replace_dict["values"], ) for key, value in row_object.items() + if key not in replace_dict["ignore_columns"] } self.data.append(row_object) diff --git a/brickschema/brickify/src/handlers/Handler/TableHandler.py b/brickschema/brickify/src/handlers/Handler/TableHandler.py index a2c3420..f516f0d 100644 --- a/brickschema/brickify/src/handlers/Handler/TableHandler.py +++ b/brickschema/brickify/src/handlers/Handler/TableHandler.py @@ -1,11 +1,10 @@ import csv import re import traceback -from pathlib import Path -from typing import Optional, List - from jinja2 import Template +from pathlib import Path from typer import progressbar +from typing import Optional, List from brickschema.brickify.src.handlers.Handler.Handler import Handler from brickschema.brickify.util import cleaned_value @@ -43,21 +42,22 @@ def ingest_data(self): Ingests tabular data into a key-value based data model where the key is the column header, and value is the cleaned cell value. """ + replace_dict = {"headers": {}, "values": {}, "ignore_columns": {}} + if "replace_dict" in self.config: + for key in replace_dict.keys(): + if key in self.config["replace_dict"]: + replace_dict[key] = self.config["replace_dict"][key] + with open(self.source, newline="") as csv_file: reader = csv.DictReader(csv_file, dialect=self.dialect) for row in reader: - replace_dict = ( - self.config["replace_dict"]["values"] - if "replace_dict" in self.config - and "values" in self.config["replace_dict"] - else {} - ) item = { key.strip(): cleaned_value( value, - replace_dict=replace_dict, + replace_dict=replace_dict["values"], ) for key, value in row.items() + if key not in replace_dict["ignore_columns"] } self.data.append(item) diff --git a/brickschema/brickify/util.py b/brickschema/brickify/util.py index 9432344..a2cb72f 100644 --- a/brickschema/brickify/util.py +++ b/brickschema/brickify/util.py @@ -2,18 +2,31 @@ The util module provides helper functions used by brickify. """ +import click_spinner import json import re -from pathlib import Path -from typing import Optional, Dict - -import click_spinner import typer +import urllib.parse import yaml +from pathlib import Path from rdflib import Namespace, OWL, RDF, RDFS, Graph +from typing import Optional, Dict, Union +from unicodedata import normalize from xlrd import open_workbook +def decode(value: Union[str, bytes]): + """ + Returns a UTF-8 string produced for the given value. + + :param value: string|bytes + :returns: UTF-8 decoded string + """ + if type(value) is bytes: + return value.decode("UTF-8") + return value + + def cleaned_value(value, replace_dict: Optional[Dict] = {}): """ Returns a cleaned value produced by doing regex replacements and elimination @@ -39,9 +52,11 @@ def cleaned_value(value, replace_dict: Optional[Dict] = {}): return True if value in ["FALSE", "false", "False", "off", "OFF"]: return False + clean_value = clean_value.strip() for replacement in replace_dict.items(): - clean_value = re.sub(*replacement, clean_value) - return clean_value.strip() + clean_value = re.sub(*replacement, clean_value).strip() + clean_value = urllib.parse.quote_plus(clean_value.encode("UTF-8")) + return clean_value return clean_value diff --git a/brickschema/web.py b/brickschema/web.py index 7353a3f..0dd63aa 100644 --- a/brickschema/web.py +++ b/brickschema/web.py @@ -48,7 +48,12 @@ def home(self): def bindings(self): return jsonify( - {prefix: namespace for prefix, namespace in self.graph.namespaces() if prefix not in self.ignore_prefixes}) + { + prefix: namespace + for prefix, namespace in self.graph.namespaces() + if prefix not in self.ignore_prefixes + } + ) def apply_reasoning(self, profile): self.graph.expand(profile)